def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted PGD on Iris: %.2f%%',
                    (acc * 100))
Пример #2
0
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC
        }

        (_, _), (x_test, y_test) = self.iris
        x_test_original = x_test.copy()

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with PGD adversarial examples: '
                '%.2f%%', (acc * 100))

            # Test targeted attack
            targets = random_targets(y_test, nb_classes=3)
            attack = ProjectedGradientDescent(classifier,
                                              targeted=True,
                                              eps=1,
                                              eps_step=0.1)
            x_test_adv = attack.generate(x_test, **{'y': targets})
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(
                preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
            logger.info(
                'Success rate of ' + classifier.__class__.__name__ +
                ' on targeted PGD on Iris: %.2f%%', (acc * 100))

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - x_test))),
                                   0.0,
                                   delta=0.00001)
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test PGD with np.inf norm
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%', acc * 100)

        # Test PGD with 3 random initialisations
        attack = ProjectedGradientDescent(classifier, num_random_init=3)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info(
            'Accuracy on adversarial train examples with 3 random initialisations: %.2f%%',
            acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on adversarial test examples with 3 random initialisations: %.2f%%',
            acc * 100)
Пример #4
0
 def __init__(self,
              model,
              targeted=False,
              step_size_iter=.1,
              max_perturbation=.3,
              norm_order=np.inf,
              max_iterations=100,
              num_random_init=0,
              batch_size=16):
     super().__init__(model=model)
     self._targeted = targeted
     self._step_size_iter = step_size_iter
     self._max_perturbation = max_perturbation
     self._norm_order = norm_order
     self._max_iterations = max_iterations
     self._num_random_init = num_random_init
     self._method = ProjectedGradientDescent(
         classifier=self.model,
         targeted=self._targeted,
         norm=self._norm_order,
         eps=self._max_perturbation,
         eps_step=self._step_size_iter,
         max_iter=self._max_iterations,
         num_random_init=self._num_random_init,
         batch_size=batch_size)
    def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128):
        self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
        self.norm = norm
        self.batch_size = batch_size
        self.attack = ProjectedGradientDescent(self.wrapped_pytorch_model,
                                               norm=norm,
                                               random_init=False,
                                               batch_size=batch_size)

        # Use GPU for computation if it is available
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
    def test_pytorch_iris_pt(self):
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = ProjectedGradientDescent(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(
            targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted PGD on Iris: %.2f%%",
                    (acc * 100))
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = ProjectedGradientDescent(classifier=classifier)

        self.assertIn(
            'For `ProjectedGradientDescent` classifier must be an instance of '
            '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
            '(<class \'object\'>,).', str(context.exception))
def pgd_linf(model, X, y, optimizer,epsilon=0.1):
    """ Construct pgd adversarial examples on the examples X"""
    classifier = PyTorchClassifier(
    model=model_concetenate,
    loss = custom_loss,
    optimizer=optimizer,
    input_shape=(1,28,28),
    nb_classes=10,
    device_type='gpu'
    )
    attack = ProjectedGradientDescent(classifier=classifier,eps=epsilon)
        
    X_adv = attack.generate(X.numpy(),y.numpy())
    return torch.Tensor(X_adv)
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(
            model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = ProjectedGradientDescent(classifier=classifier)

        self.assertIn(
            'For `ProjectedGradientDescent` classifier must be an instance of '
            '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
            '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).',
            str(context.exception))
Пример #10
0
def train_model(model,
                train_generator,
                val_generator,
                save_path,
                nb_epochs=20,
                adv_eps=0.0,
                adv_frac=0.5,
                **kwargs):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    best_acc = 0
    attacker = None
    if adv_eps > 0:
        classifier = cnn_model.SmoothedPytorchClassifier(
            model,
            nb_classes=40,
            loss=torch.nn.CrossEntropyLoss(),
            optimizer=torch.optim.Adam(model.parameters(), lr=0.0001),
            input_shape=(cnn_model.WINDOW_LENGTH, ))
        attacker = ProjectedGradientDescent(
            classifier,
            eps=adv_eps,
            eps_step=adv_eps / 5,
            max_iter=10,
            batch_size=train_generator.batch_size)
        #attacker = FastGradientMethod(classifier,eps=adv_eps,batch_size=train_generator.batch_size)
    for i in range(nb_epochs):
        logger.info("Epoch %d" % i)
        fit(model,
            train_generator,
            optimizer,
            adversarial_attacker=attacker,
            adversarial_frac=adv_frac,
            **kwargs)
        loss, nat_acc, adv_acc = eval_benign(model,
                                             val_generator,
                                             adversarial_attacker=attacker,
                                             niters=1)
        logger.info("Validation loss : %f" % loss)
        logger.info("Validation accuracy : %f" % nat_acc)
        acc = nat_acc
        if adv_eps > 0:
            logger.info("Adversarial accuracy : %f" % adv_acc)
            acc = adv_acc
        if acc > best_acc:
            best_acc = acc
            logger.info("Saving model")
            torch.save(model.state_dict(), save_path)
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%',
                    (acc * 100))
Пример #13
0
    def evaluate_pgd(self, data_loader, num_iter=40):
        """Adversarial evaluation by PGD"""
        norm, eps = np.inf, attack_configs['PGD'][self.dataset]['epsilon']
        eps_step = 2 * eps / num_iter
        adv_crafter = ProjectedGradientDescent(self.classifier,
                                               norm=norm,
                                               eps=eps,
                                               eps_step=eps_step,
                                               max_iter=num_iter,
                                               random_init=True)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC,
        }

        (_, _), (x_test, y_test) = self.iris
        x_test_original = x_test.copy()

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((np.array(x_test) == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(np.array(y_test),
                                        axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test)
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with PGD adversarial examples: "
                "%.2f%%",
                (acc * 100),
            )
Пример #15
0
def pgd_adv_train(model, data, outpath, model_name):
    attack = ProjectedGradientDescent(model,
                                      eps=0.015,
                                      eps_step=0.001,
                                      max_iter=2,
                                      targeted=False,
                                      num_random_init=0,
                                      )

    adv_trainer = AdversarialTrainer(model,
                                     attacks=attack,
                                     ratio=1.0)
    print('>>> Processing adversarial training, it will take a while...')
    x_train, y_train = data
    adv_trainer.fit(x_train, y_train, nb_epochs=30, batch_size=32)

    savefile = os.path.join(outpath, model_name)
    print('>>>Save the model to [{}]'.format(savefile))
    adv_trainer.classifier.save(savefile)

    return adv_trainer.classifier
    def test_pytorch_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((np.array(x_test) == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(np.array(y_test),
                                    axis=1) == preds_adv).all())
        acc = np.sum(
            preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test)
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))
Пример #17
0
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3):
    model.eval()
    wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class)

    if method == 'deepfool':
        adv_crafter = DeepFool(wmodel)
    elif method == 'bim':
        adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size)
    elif method == 'jsma':
        adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size)
    elif method == 'cw2':
        adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size)
    elif method == 'cwi':
        adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size)
    elif method == 'fgsm':
        adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size)
    elif method == 'pgd':
        adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps)
    else:
        raise NotImplementedError('Unsupported Attack Method: {}'.format(method))

    return adv_crafter
    def test_keras_iris_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.2,
                                          max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%",
                    (acc * 100))
Пример #19
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()

    if attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=.5, gamma=1.)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=1,
                                            eps=1,
                                            eps_step=0.5,
                                            max_iter=100,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=1)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
Пример #20
0
def test_robust(opt, model, classifier, attack_method, c, norm=None):
    if opt.attack == 'FGSM':
        adv_crafter = FastGradientMethod(classifier,
                                         norm=norm,
                                         eps=c,
                                         targeted=False,
                                         num_random_init=0,
                                         batch_size=opt.bs)
    if opt.attack == 'PGD':
        adv_crafter = ProjectedGradientDescent(classifier,
                                               norm=norm,
                                               eps=c,
                                               eps_step=c / 10.,
                                               max_iter=10,
                                               targeted=False,
                                               num_random_init=1,
                                               batch_size=opt.bs)
    if opt.attack == 'BIM':
        adv_crafter = ProjectedGradientDescent(classifier,
                                               norm=norm,
                                               eps=c,
                                               eps_step=c / 10.,
                                               max_iter=10,
                                               targeted=False,
                                               num_random_init=0,
                                               batch_size=bs)
    if opt.attack == 'JSMA':
        adv_crafter = SaliencyMapMethod(classifier,
                                        theta=0.1,
                                        gamma=c,
                                        batch_size=opt.bs)
    if opt.attack == 'CW':
        adv_crafter = cw.L2Adversary(targeted=False,
                                     confidence=0.01,
                                     c_range=(c, 1e10),
                                     max_steps=1000,
                                     abort_early=False,
                                     search_steps=5,
                                     box=(0., 1.0),
                                     optimizer_lr=0.01)

    correct = 0
    total = 0
    total_sum = 0
    common_id = []
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        output = classifier.predict(inputs.cpu().numpy(), batch_size=opt.bs)
        output = torch.tensor(output)
        output = output.cuda()
        init_pred = output.max(1, keepdim=False)[1]
        common_id = np.where(
            init_pred.cpu().numpy() == targets.cpu().numpy())[0]

        if opt.attack == 'CW':
            x_test_adv = adv_crafter(model, inputs, targets, to_numpy=True)
        else:
            x_test_adv = adv_crafter.generate(x=inputs.cpu().numpy())

        perturbed_output = classifier.predict(x_test_adv)
        perturbed_output = torch.tensor(perturbed_output)
        perturbed_output = perturbed_output.cuda()
        final_pred = perturbed_output.max(1, keepdim=False)[1]
        total_sum += targets.size(0)
        total += len(common_id)
        correct += final_pred[common_id].eq(
            targets[common_id].data).cpu().sum()
        attack_acc = 100. * float(correct) / total

        progress.progress_bar(
            batch_idx, len(testloader),
            'Attack Strength:%.3f, robust accuracy: %.3f%% (%d/%d)'
            '' % (c, attack_acc, correct, total))
    def _test_backend_mnist(self, classifier, x_train, y_train, x_test,
                            y_test):
        x_test_original = x_test.copy()

        # Test PGD with np.inf norm
        attack = ProjectedGradientDescent(classifier,
                                          eps=1,
                                          eps_step=0.1,
                                          max_iter=5)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / len(y_train)
        logger.info("Accuracy on adversarial train examples: %.2f%%",
                    acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test),
                                                        axis=1)) / len(y_test)
        logger.info("Accuracy on adversarial test examples: %.2f%%", acc * 100)

        # Test PGD with 3 random initialisations
        attack = ProjectedGradientDescent(classifier,
                                          num_random_init=3,
                                          max_iter=5)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / len(y_train)
        logger.info(
            "Accuracy on adversarial train examples with 3 random initialisations: %.2f%%",
            acc * 100)

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test),
                                                        axis=1)) / len(y_test)
        logger.info(
            "Accuracy on adversarial test examples with 3 random initialisations: %.2f%%",
            acc * 100)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(np.array(x_test_original) - np.array(x_test)))),
                               0.0,
                               delta=0.00001)
Пример #22
0
datagen.fit(x_train)
art_datagen = KerasDataGenerator(
    datagen.flow(x=x_train, y=y_train, batch_size=batch_size, shuffle=True),
    size=x_train.shape[0],
    batch_size=batch_size,
)

# Create a toy Keras CNN architecture & wrap it under ART interface
classifier = KerasClassifier(build_model(),
                             clip_values=(0, 1),
                             use_logits=False)

# Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model
pgd = ProjectedGradientDescent(classifier,
                               eps=8,
                               eps_step=2,
                               max_iter=10,
                               num_random_init=20)

# Create some adversarial samples for evaluation
x_test_pgd = pgd.generate(x_test)

# Create adversarial trainer and perform adversarial training
adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0)
adv_trainer.fit_generator(art_datagen, nb_epochs=83)

# Evaluate the adversarially trained model on clean test set
labels_true = np.argmax(y_test, axis=1)
labels_test = np.argmax(classifier.predict(x_test), axis=1)
print("Accuracy test set: %.2f%%" %
      (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))
Пример #23
0
                                                                  100))

        adv_crafter_deepfool = DeepFool(classifier,
                                        batch_size=batch_size,
                                        epsilon=epsilon)
        x_test_adv = adv_crafter_deepfool.generate(x=x_test / 255.0)
        predictions = classifier.predict(x_test_adv * 255.0)
        print(np.argmax(predictions, axis=1))
        accuracy = np.sum(
            np.argmax(predictions, axis=1) == y_test) / len(y_test)
        print('Accuracy on adversarial test examples: {}%'.format(accuracy *
                                                                  100))
        # pgd 20
        adv_crafter_pgd_20 = ProjectedGradientDescent(classifier,
                                                      eps=epsilon,
                                                      eps_step=0.00775,
                                                      max_iter=20,
                                                      batch_size=batch_size)
        x_test_adv = adv_crafter_pgd_20.generate(x=x_test / 255.0)
        # print(x_test_adv)
        predictions = classifier.predict(x_test_adv * 255.0)
        accuracy = np.sum(
            np.argmax(predictions, axis=1) == y_test) / len(y_test)
        print('Accuracy on adversarial test examples: {}%'.format(accuracy *
                                                                  100))

        # C&W 20
        # adv_crafter_cwinf = CarliniLInfMethod(classifier, eps=epsilon, learning_rate=epsilon/10, max_iter=20, batch_size=batch_size)
        # x_test_adv = adv_crafter_cwinf.generate(x=x_test/255.0)

        # predictions = classifier.predict(x_test_adv*255.0)
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            # Test untargeted attack
            attack = ProjectedGradientDescent(classifier,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris,
                                        axis=1) == preds_adv).all())
            acc = np.sum(preds_adv == np.argmax(
                self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with PGD adversarial examples: "
                "%.2f%%",
                (acc * 100),
            )

            # Test targeted attack
            targets = random_targets(self.y_test_iris, nb_classes=3)
            attack = ProjectedGradientDescent(classifier,
                                              targeted=True,
                                              eps=1,
                                              eps_step=0.1,
                                              max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(preds_adv == np.argmax(
                targets, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Success rate of " + classifier.__class__.__name__ +
                " on targeted PGD on Iris: %.2f%%", (acc * 100))

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - self.x_test_iris))),
                                   0.0,
                                   delta=0.00001)
    predict = classifier.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print('Accuracy on CarliniLInfMethod test examples: {:.3f}%'.format(
        accuracy * 100))
    carlini_example = x_test_adv[example]

    print("*" * 100)
    attack = ProjectedGradientDescent(classifier,
                                      norm=np.inf,
                                      eps=0.3,
                                      eps_step=0.1,
                                      max_iter=100)
    x_test_adv = attack.generate(x_test)
    perturbation = np.mean(np.abs((x_test_adv - x_test)))
    print('Average perturbation: {:.10f}'.format(perturbation))
    predict = classifier.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print(
        'Accuracy on ProjectedGradientDescent[norm=inf] test examples: {:.3f}%'
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
        assert isinstance(model, AttackPGD), 'Incorrect Model Configuration'
    model = model.model.eval()
    # model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)
    elif method == 'PGD':
        adv_crafter = ProjectedGradientDescent(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images, conf)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
Пример #27
0
def get_adversarial(targeted,
                    attack_name,
                    classifier,
                    xs,
                    target_ys,
                    batch_size,
                    dataset,
                    fgsm_epsilon=0,
                    cwl2_confidence=0):

    # The attack
    attack = ''
    samples_range = xs.shape[0]

    #======================================
    if attack_name == 'FastGradientMethod':
        # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False
        attack = FastGradientMethod(classifier=classifier,
                                    targeted=targeted,
                                    eps=fgsm_epsilon,
                                    batch_size=batch_size)
    #=====================================
    elif attack_name == 'CarliniLInfMethod':
        # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5,
        #max_doubling=5, eps=0.3, batch_size=128
        attack = CarliniLInfMethod(classifier=classifier,
                                   max_iter=1000,
                                   targeted=targeted,
                                   batch_size=batch_size)
    #-------------------------------
    elif attack_name == 'UniversalPerturbation':
        # attacker='deepfool', attacker_params=None, delta=0.2,
        # max_iter=20, eps=10.0, norm=np.inf

        if targeted:
            print('UniversalPerturbation attack cannot be targeted.')
            exit()
        attack = UniversalPerturbation(classifier=classifier, max_iter=5)

    #==============================================
    elif attack_name == 'ProjectedGradientDescent':
        # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100,
        # targeted=False, num_random_init=0, batch_size=1
        if dataset == 'mnist':
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=.3,
                                              eps_step=0.01,
                                              num_random_init=0,
                                              max_iter=40,
                                              batch_size=batch_size)
        else:
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=8.0,
                                              eps_step=2.0,
                                              num_random_init=0,
                                              max_iter=7,
                                              batch_size=batch_size)

    if targeted:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :],
                              y=target_ys[0:batch_size])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :],
                y=target_ys[ii:ii + batch_size])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(
                last_samples,
                y=target_ys[last_ii + batch_size:])  ################
            adv = np.concatenate((adv, adv_samples), axis=0)
    else:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(last_samples)  ################
            adv = np.concatenate((adv, adv_samples), axis=0)

    adv = np.asarray(adv)
    return adv
Пример #28
0
    predict = robust_cnn.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print('Accuracy on CarliniLInfMethod test examples: {:.3f}%'.format(
        accuracy * 100))
    carlini_example = x_test_adv[example]

    print("*" * 100)
    attack = ProjectedGradientDescent(robust_cnn,
                                      norm=np.inf,
                                      eps=0.03,
                                      eps_step=0.007,
                                      max_iter=100)
    x_test_adv = attack.generate(x_test)
    perturbation = np.mean(np.abs((x_test_adv - x_test)))
    print('Average perturbation: {:.10f}'.format(perturbation))
    predict = robust_cnn.predict(x_test_adv)
    predict_classes = np.argmax(predict, axis=-1)
    target_names = ["Class {}".format(i) for i in range(CLASSES)]
    print(
        classification_report(y_test,
                              predict_classes,
                              target_names=target_names))
    accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test)
    print(
        'Accuracy on ProjectedGradientDescent[norm=inf] test examples: {:.3f}%'
Пример #29
0
    # # training for MNIST
    # classifier = KerasClassifier(clip_values=(-0.5, 0.5), model=model, use_logits=False)
    # attack = ProjectedGradientDescent(classifier, eps=0.3, eps_step=0.01, max_iter=20, batch_size=128)

    # ## training for CIFAR
    # classifier = KerasClassifier(model=model, use_logits=False)
    # attack = ProjectedGradientDescent(classifier, eps=8/255, eps_step=2/255, max_iter=10, batch_size=512)

    ## training for SVHN
    classifier = KerasClassifier(clip_values=(-0.5, 0.5),
                                 model=model,
                                 use_logits=False)
    attack = ProjectedGradientDescent(classifier,
                                      eps=8 / 255,
                                      eps_step=1 / 255,
                                      max_iter=20,
                                      batch_size=512)

    x_test_pgd = attack.generate(x_test, y_test)
    # np.save('./data/' + dataset + '_data/model/' + model_name + '_y_' + attack_name + '.npy', x_test_pgd)

    # Evaluate the benign trained model on adv test set
    labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
    print('Accuracy on original PGD adversarial samples: %.2f%%' %
          (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

    trainer = AdversarialTrainer(classifier, attack, ratio=1.0)
    trainer.fit(x_train, y_train, nb_epochs=60, batch_size=1024)

    classifier.save(filename='adv_' + model_name + '.h5',
Пример #30
0
predictions = classifier2.predict(shared_x_test)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model2 on shared test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)
print('Top 5 accuracy of model2 on shared test examples: {}%'.format(
    top_five_acc * 100))

# Define attack based on model1

if attack_choice == "fgsm":
    attack = FastGradientMethod(classifier=classifier1, eps=eps)
else:
    attack = ProjectedGradientDescent(classifier=classifier1,
                                      eps=eps,
                                      max_iter=adv_steps)

print()

print("generating adversarial examples...")

# generate adv examples for model1 based on shared data
x_test_adv = attack.generate(x=shared_x_test)

# test adv examples generated from model1 on model1
predictions = classifier1.predict(x_test_adv)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model1 on adversarial test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)