示例#1
0
    def _to_art_classifier(
        classifier: Union[tf.keras.Model, torch.nn.Module],
        nb_classes: int,
        input_shape: Tuple[int, ...],
    ) -> Union[TensorFlowV2Classifier, PyTorchClassifier]:
        """Converts a classifier to an ART classifier.

        :param classifier: Classifier to be converted. Either a Pytorch or Tensorflow classifier.
        :param nb_classes: Number of classes that were used to train the classifier.
        :param input_shape: Input shape of a data point of the classifier.
        :return: Given classifier converted to an ART classifier.
        :raises TypeError: If the given classifier is of an invalid type.
        """
        if isinstance(classifier, torch.nn.Module):
            return PyTorchClassifier(
                model=classifier,
                loss=None,
                nb_classes=nb_classes,
                input_shape=input_shape,
            )
        if isinstance(classifier, tf.keras.Model):
            return TensorFlowV2Classifier(
                model=classifier,
                nb_classes=nb_classes,
                input_shape=input_shape,
            )
        else:
            raise TypeError(
                f"Expected classifier to be an instance of {str(torch.nn.Module)} or {str(tf.keras.Model)}, received {str(type(classifier))} instead."
            )
示例#2
0
def test_generate(art_warning):
    try:

        x_train = np.ones((2, 12, 299, 299, 3)).astype(np.float32)
        y_train = np.zeros((2, 101))
        y_train[:, 1] = 1

        model = Model()
        classifier = PyTorchClassifier(model=model,
                                       loss=None,
                                       input_shape=x_train.shape[1:],
                                       nb_classes=y_train.shape[1],
                                       clip_values=(0, 1))
        attack = OverTheAirFlickeringPyTorch(classifier=classifier,
                                             max_iter=1,
                                             verbose=False)

        x_train_adv = attack.generate(x=x_train, y=y_train)

        assert x_train.shape == x_train_adv.shape
        assert np.min(x_train_adv) >= 0.0
        assert np.max(x_train_adv) <= 1.0

    except ARTTestException as e:
        art_warning(e)
示例#3
0
def main(args):
    print('==> Loading data..')
    if args['dataset'] == 'mnist':
        (_, _), (x_test,
                 y_test), min_pixel_value, max_pixel_value = load_mnist()
        input_shape = (1, 28, 28)
    else:
        (_, _), (x_test,
                 y_test), min_pixel_value, max_pixel_value = load_cifar10()
        input_shape = (3, 32, 32)

    x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

    print('==> Loading model..')
    model = loadmodel(args)
    model = model.cuda()
    model = model.eval()

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    classifier = PyTorchClassifier(
        model=model,
        clip_values=(min_pixel_value, max_pixel_value),
        loss=criterion,
        optimizer=optimizer,
        input_shape=input_shape,
        nb_classes=10,
    )

    predictions = classifier.predict(x_test[:args['n_samples']])
    clean_accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(
            y_test[:args['n_samples']], axis=1)) / len(
                y_test[:args['n_samples']])
    print("Accuracy on benign test examples: {}%".format(clean_accuracy * 100))

    print("==> Evaluate the classifier on adversarial test examples")
    queries = [100, 200, 500]
    acc = attackmodel(args, classifier, x_test[:args['n_samples']],
                      y_test[:args['n_samples']], queries)
    np.save("./pgd_results/" + args['dataset'] + args['save'], np.array(acc))
    print("The adjusted accuracies are:")
    print(acc)
示例#4
0
文件: utils.py 项目: srm-mic/Fluorine
def getClassifier(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    classifier = PyTorchClassifier(
        model=model,
        clip_values=(0, 1),
        loss=criterion,
        optimizer=optimizer,
        input_shape=(3, 224, 224),
        nb_classes=1000,
    )
    return classifier
示例#5
0
def attack_FGSM_nontargeted(dataloader, model, model_info, args,
                            checkpoint_dir):
    """
    FGSM attack
    """
    device = args.device
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    img_size = model_info["model_img_size"]
    n_classes = model_info["num_classes"]

    classifier = PyTorchClassifier(
        model=model,
        loss=criterion,
        clip_values=(0.0, 1.0),
        optimizer=optimizer,
        input_shape=(img_size, img_size),
        nb_classes=n_classes,
        device_type=device,
    )

    # attack = FastGradientMethod(estimator=classifier, batch_size=args.batch_size)
    attack = FastGradientMethod(estimator=classifier,
                                batch_size=args.batch_size)

    # Launching a non-targeted attack
    # t = args.target_class
    print(f"Launching FGSM nontargeted attack")
    dest_images = os.path.join(checkpoint_dir, args.model_name)
    os.makedirs(dest_images, exist_ok=True)

    # Running over the entire-batch to compute a universal perturbation
    for data in tqdm(dataloader):
        sample, label, img_path = data
        sample = sample.to(device)
        # Launch attack
        sample_adv = attack.generate(x=sample.cpu())

        # Code to save these images
        img_path = [it.split("/")[-1] for it in img_path]

        for i in range(len(sample_adv)):
            _img = sample_adv[i].transpose(1, 2, 0)
            skimage.io.imsave(os.path.join(dest_images, img_path[i]),
                              img_as_ubyte(_img))

    with open(os.path.join(dest_images, "stats.txt"), "w") as f:
        f.write(f"Fooling-rate was nan\n")

    return dest_images
示例#6
0
def test_general_iris_nn(iris_dataset):
    """
    Check whether the produced adversaries are correct,
    given Neural Network classifier and iris flower dataset.
    """
    (x_train, y_train, x_valid, y_valid), _, clip_values = iris_dataset

    x = Variable(torch.FloatTensor(np.array(x_train)))
    y = Variable(torch.FloatTensor(np.eye(3)[y_train]))

    neural_network = NeuralNetwork()
    nn_model_irises = neural_network.get_nn_model(4, 3, 10)
    neural_network.train_nn(nn_model_irises, x, y, 1e-4, 1000)

    est_nn_iris = PyTorchClassifier(model=nn_model_irises,
                                    loss=neural_network.loss_fn,
                                    input_shape=(4, ),
                                    nb_classes=3,
                                    clip_values=clip_values)

    lpf_nn = LowProFool(classifier=est_nn_iris,
                        eta=5,
                        lambd=0.2,
                        eta_decay=0.9)

    lpf_nn.fit_importances(x_valid, y_valid)

    target = np.eye(3)[np.array(
        y_valid.apply(
            lambda x: np.random.choice([i for i in range(3) if i != x])))]

    # Use of LowProFool
    adversaries = lpf_nn.generate(x=x_valid, y=target)
    expected = np.argmax(target, axis=1)

    x = Variable(torch.from_numpy(adversaries.astype(np.float32)))
    predicted = np.argmax(nn_model_irises.forward(x).detach().numpy(), axis=1)

    # Test
    correct = expected == predicted
    success_rate = np.sum(correct) / correct.shape[0]
    expected = 0.75
    logger.info(
        "[Irises, PyTorch neural network] success rate of adversarial attack (expected >{:.2f}): "
        "{:.2f}%".format(expected * 100, success_rate * 100))
    assert success_rate > expected
def create_classifier_art():
    in_chans=1
    extras=dict(in_chans=in_chans)
    model=timm.create_model("resnet50",
                            pretrained=True,
                            num_classes=10,
                            **extras
                            )
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    classifier = PyTorchClassifier(
        model=model,
        clip_values=(0, 1),
        loss=criterion,
        optimizer=optimizer,
        input_shape=(in_chans, 32, 32),
        nb_classes=10,
        )
    
    return classifier
示例#8
0
def test_get_loss_gradients(art_warning):
    try:

        x_train = np.ones((2, 12, 299, 299, 3)).astype(np.float32)
        y_train = np.zeros((2, 101))
        y_train[:, 1] = 1

        model = Model()
        classifier = PyTorchClassifier(
            model=model, loss=None, input_shape=x_train.shape[1:], nb_classes=y_train.shape[1]
        )
        attack = OverTheAirFlickeringPyTorch(classifier=classifier, verbose=False)

        gradients = attack._get_loss_gradients(
            x=torch.from_numpy(x_train), y=torch.from_numpy(y_train), perturbation=torch.zeros(x_train.shape)
        )

        assert gradients.shape == (2, 12, 1, 1, 3)

    except ARTTestException as e:
        art_warning(e)
示例#9
0
def train(dataloader, model,criterion, optimizer, scheduler, epoch):
    model.train()
    print('epoch ' + str(epoch))

    train_loss = 0.0
    train_acc = 0.0
    total = len(dataloader)
    start = time.time()
    toPilImage = transforms.ToPILImage()    # transform tensor into PIL image to save

    for batch_num, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)


        # gauss noise training
        gauss_noise = torch.randn_like(x, device=device) * args.noise_sd
        # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd

        # targeted noise training
        tmp_criterion = nn.CrossEntropyLoss()
        tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        classifier = PyTorchClassifier(
            model=model,
            clip_values=(min_pixel_value, max_pixel_value),
            loss=tmp_criterion,
            optimizer=tmp_optimizer,
            input_shape=(3, 32, 32),
            nb_classes=10,
        )

        # all other classes
        targets = []
        y_np = y.cpu().numpy()
        for i in range(y.shape[0]) :
            targets.append( np.expand_dims( np.random.permutation( np.delete(np.arange(get_num_classes()), y_np[i]) ), axis=0 ) )
        # print(targets[0].shape)
        targets = np.concatenate(targets)
        # print(targets.shape)
        # exit(0)

        mix_noise = torch.zeros_like(x)
        for t in range(targets.shape[1]):
            # generate random targets
            # targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes())

            # calculate loss gradient
            # print(np.squeeze(targets[:,t]).shape)
            # exit()

            y_slice = np.squeeze(targets[:,t])
            y_oh = np.zeros((y_slice.size, get_num_classes()))
            y_oh[np.arange(y_slice.size), y_slice] = 1


            grad = classifier.loss_gradient(x=x.cpu().numpy(), y=y_oh) * (-1.0)
            scaled_grad = torch.Tensor(grad * args.eps_step).to(device)

            mix_noise += scaled_grad

            model.zero_grad()
            tmp_optimizer.zero_grad()

            # print((scaled_grad.shape, gauss_noise.shape, targets.shape))

        # combine noise and targeted noise
        x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (mix_noise * args.k_value)

        model.zero_grad()

        output = model(x_combine)
        loss = criterion(output, y)
        acc = accuracy(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()       
        train_acc += acc

    scheduler.step()
    end = time.time()
    print('trainning time:',end - start,'sec, loss: ', train_loss/total, 'acc: ', train_acc/total)
    return train_loss/total, train_acc/total
# Step 2: Create the model

model = Net()

# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 3: Create the ART classifier

classifier = PyTorchClassifier(
    model=model,
    clip_values=(min_pixel_value, max_pixel_value),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

# Step 4: Train the ART classifier

classifier.fit(x_train, y_train, batch_size=64, nb_epochs=3)

# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Step 6: Generate adversarial test examples
    weight_decay = 1e-2
    params = model.parameters()
    optimizer = torch.optim.SGD(params,
                                lr=lr_max,
                                momentum=0.9,
                                weight_decay=weight_decay)

    min_pixel_value = 0
    max_pixel_value = 1

    # Step 3: Create the ART classifier
    classifier = PyTorchClassifier(
        model=model,
        clip_values=(min_pixel_value, max_pixel_value),
        loss=criterion,
        optimizer=optimizer,
        input_shape=(3, 32, 32),
        nb_classes=10,
        preprocessing=(cifar_mu, cifar_std),
    )

    # Step 5: Evaluate the ART classifier on benign test examples

    #     normalized_x_test = normalize(x_test)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    #     accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
    print("=== Accuracy on benign test examples: {}%".format(accuracy * 100))

    # Step 6: Generate adversarial test examples
    def __init__(
        self,
        estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
        norm: Union[int, float, str] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        nb_random_init: int = 5,
        batch_size: int = 32,
        loss_type: Optional[str] = None,
        verbose: bool = True,
    ):
        """
        Create a :class:`.AutoProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param loss_type: Defines the loss to attack. Available options: None (Use loss defined by estimator),
            "cross_entropy", or "difference_logits_ratio"
        :param verbose: Show progress bars.
        """
        from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier

        if loss_type not in self._predefined_losses:
            raise ValueError(
                "The argument loss_type has an invalid value. The following options for `loss_type` are currently "
                "supported: {}".format(self._predefined_losses)
            )

        if loss_type is None:
            if hasattr(estimator, "predict") and is_probability(
                estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32))
            ):
                raise ValueError(
                    "AutoProjectedGradientDescent is expecting logits as estimator output, the provided "
                    "estimator seems to predict probabilities."
                )

            estimator_apgd = estimator
        else:
            if isinstance(estimator, TensorFlowClassifier):
                import tensorflow as tf

                if loss_type == "cross_entropy":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise NotImplementedError("Cross-entropy loss is not implemented for probability output.")

                    self._loss_object = tf.reduce_mean(
                        tf.keras.losses.categorical_crossentropy(
                            y_pred=estimator._output, y_true=estimator._labels_ph, from_logits=True
                        )
                    )

                elif loss_type == "difference_logits_ratio":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    raise ValueError(
                        "The loss `difference_logits_ratio` has not been validate completely. It seems that the "
                        "commented implemented below is failing to selected the second largest logit for cases "
                        "where the largest logit is the true logit. For future work `difference_logits_ratio` and "
                        "loss_fn should return the same loss value."
                    )

                    # def difference_logits_ratio(y_true, y_pred):
                    #     i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32)
                    #     i_y_pred_arg = tf.argsort(y_pred, axis=1)
                    #     # Not completely sure if the following line is correct.
                    #     # `i_y_pred_arg[:, -2], i_y_pred_arg[:, -1]` seems closer to the output of `loss_fn` than
                    #     # `i_y_pred_arg[:, -1], i_y_pred_arg[:, -2]`
                    #     i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -2],
                    #                      i_y_pred_arg[:, -1])
                    #
                    #     z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0)
                    #     z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0)
                    #     z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                    #     z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)
                    #
                    #     z_1 = tf.linalg.diag_part(z_1)
                    #     z_3 = tf.linalg.diag_part(z_3)
                    #     z_i = tf.linalg.diag_part(z_i)
                    #     z_y = tf.linalg.diag_part(z_y)
                    #
                    #     dlr = -(z_y - z_i) / (z_1 - z_3)
                    #
                    #     return tf.reduce_mean(dlr)
                    #
                    # def loss_fn(y_true, y_pred):
                    #     i_y_true = np.argmax(y_true, axis=1)
                    #     i_y_pred_arg = np.argsort(y_pred, axis=1)
                    #     i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -1],
                    #                      i_y_pred_arg[:, -2])
                    #
                    #     z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                    #     z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                    #     z_i = y_pred[:, i_z_i]
                    #     z_y = y_pred[:, i_y_true]
                    #
                    #     z_1 = np.diag(z_1)
                    #     z_3 = np.diag(z_3)
                    #     z_i = np.diag(z_i)
                    #     z_y = np.diag(z_y)
                    #
                    #     dlr = -(z_y - z_i) / (z_1 - z_3)
                    #
                    #     return np.mean(dlr)
                    #
                    # self._loss_fn = loss_fn
                    # self._loss_object = difference_logits_ratio(y_true=estimator._labels_ph,
                    #                                             y_pred=estimator._output)

                estimator_apgd = TensorFlowClassifier(
                    input_ph=estimator._input_ph,
                    output=estimator._output,
                    labels_ph=estimator._labels_ph,
                    train=estimator._train,
                    loss=self._loss_object,
                    learning=estimator._learning,
                    sess=estimator._sess,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                    feed_dict=estimator._feed_dict,
                )

            elif isinstance(estimator, TensorFlowV2Classifier):
                import tensorflow as tf

                if loss_type == "cross_entropy":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
                    else:
                        self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
                elif loss_type == "difference_logits_ratio":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    class difference_logits_ratio:
                        def __init__(self):
                            self.reduction = "mean"

                        def __call__(self, y_true, y_pred):
                            i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32)
                            i_y_pred_arg = tf.argsort(y_pred, axis=1)
                            i_z_i_list = list()

                            for i in range(y_true.shape[0]):
                                if i_y_pred_arg[i, -1] != i_y_true[i]:
                                    i_z_i_list.append(i_y_pred_arg[i, -1])
                                else:
                                    i_z_i_list.append(i_y_pred_arg[i, -2])

                            i_z_i = tf.stack(i_z_i_list)

                            z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0)
                            z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0)
                            z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                            z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                            z_1 = tf.linalg.diag_part(z_1)
                            z_3 = tf.linalg.diag_part(z_3)
                            z_i = tf.linalg.diag_part(z_i)
                            z_y = tf.linalg.diag_part(z_y)

                            dlr = -(z_y - z_i) / (z_1 - z_3)

                            return tf.reduce_mean(dlr)

                    self._loss_fn = difference_logits_ratio()
                    self._loss_object = difference_logits_ratio()

                estimator_apgd = TensorFlowV2Classifier(
                    model=estimator.model,
                    nb_classes=estimator.nb_classes,
                    input_shape=estimator.input_shape,
                    loss_object=self._loss_object,
                    train_step=estimator._train_step,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                )
            elif isinstance(estimator, PyTorchClassifier):
                import torch

                if loss_type == "cross_entropy":
                    if is_probability(
                        estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32))
                    ):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' "
                            "the estimator has to to predict logits."
                        )

                    self._loss_object = torch.nn.CrossEntropyLoss(reduction="mean")
                elif loss_type == "difference_logits_ratio":
                    if is_probability(
                        estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=ART_NUMPY_DTYPE))
                    ):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    class difference_logits_ratio:
                        def __init__(self):
                            self.reduction = "mean"

                        def __call__(self, y_pred, y_true):  # type: ignore
                            if isinstance(y_true, np.ndarray):
                                y_true = torch.from_numpy(y_true)
                            if isinstance(y_pred, np.ndarray):
                                y_pred = torch.from_numpy(y_pred)

                            y_true = y_true.float()

                            i_y_true = torch.argmax(y_true, axis=1)
                            i_y_pred_arg = torch.argsort(y_pred, axis=1)
                            i_z_i_list = list()

                            for i in range(y_true.shape[0]):
                                if i_y_pred_arg[i, -1] != i_y_true[i]:
                                    i_z_i_list.append(i_y_pred_arg[i, -1])
                                else:
                                    i_z_i_list.append(i_y_pred_arg[i, -2])

                            i_z_i = torch.stack(i_z_i_list)

                            z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                            z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                            z_i = y_pred[:, i_z_i]
                            z_y = y_pred[:, i_y_true]

                            z_1 = torch.diagonal(z_1)
                            z_3 = torch.diagonal(z_3)
                            z_i = torch.diagonal(z_i)
                            z_y = torch.diagonal(z_y)

                            dlr = -(z_y - z_i) / (z_1 - z_3)

                            return torch.mean(dlr.float())

                    self._loss_object = difference_logits_ratio()

                estimator_apgd = PyTorchClassifier(
                    model=estimator.model,
                    loss=self._loss_object,
                    input_shape=estimator.input_shape,
                    nb_classes=estimator.nb_classes,
                    optimizer=None,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                    device_type=estimator._device,
                )

            else:
                raise ValueError("The loss type {} is not supported for the provided estimator.".format(loss_type))

        super().__init__(estimator=estimator_apgd)
        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.nb_random_init = nb_random_init
        self.batch_size = batch_size
        self.loss_type = loss_type
        self.verbose = verbose
        self._check_params()
示例#13
0
model = nn.Sequential(nn.Conv2d(1, 4, 5), nn.ReLU(), nn.MaxPool2d(2, 2),
                      nn.Conv2d(4, 10, 5), nn.ReLU(), nn.MaxPool2d(2, 2),
                      nn.Flatten(), nn.Linear(4 * 4 * 10, 100),
                      nn.Linear(100, 10))

# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 3: Create the ART classifier

classifier = PyTorchClassifier(
    model=model,
    clip_values=(0, 1),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

classifier.fit(x_train, y_train, batch_size=128, nb_epochs=5)

predictions = classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))


def calculate_l0(batch_original, batch_adversarial, dim):
    # image_original==x_test_adv
    matrix_bool = batch_original == batch_adversarial
示例#14
0
                class Feature(nn.Module):
                    def __init__(self, features):
                        super().__init__()
                        self.features = features

                    def forward(self, x):
                        return self.features(x)[0]

                fm_model = nn.Sequential(model._model.normalize,
                                         Feature(model._model.features),
                                         model._model.pool,
                                         model._model.flatten)
            classifier = PyTorchClassifier(
                model=fm_model,
                loss=model.criterion,
                input_shape=(3, 32, 32),
                nb_classes=model._model.classifier[0].in_features,
            )
            attack_model = MembershipInferenceAttackModel(
                num_classes=model.num_classes,
                num_features=model._model.classifier[0].in_features)
            attack = MembershipInferenceBlackBox(classifier,
                                                 attack_model=attack_model)
            x_train, y_train = dataset_to_list(dataset.get_dataset('train'))
            x_train, y_train = to_numpy(
                torch.stack(x_train)), to_numpy(y_train)
            x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
            x_valid, y_valid = to_numpy(
                torch.stack(x_valid)), to_numpy(y_valid)

            x_train, y_train = x_train[:1000], y_train[:1000]
示例#15
0
    trojanvision.trainer.add_argument(parser)
    args = parser.parse_args()

    env = trojanvision.environ.create(**args.__dict__)
    dataset = trojanvision.datasets.create(**args.__dict__)
    model = trojanvision.models.create(dataset=dataset, **args.__dict__)

    if env['verbose']:
        summary(env=env, dataset=dataset, model=model)
    model._validate()
    print('\n\n')

    from art.estimators.classification import PyTorchClassifier  # type: ignore
    classifier = PyTorchClassifier(
        model=model._model,
        loss=model.criterion,
        input_shape=dataset.data_shape,
        nb_classes=model.num_classes,
    )
    x_train, y_train = dataset_to_list(dataset.get_dataset('train'))
    x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train)

    # valid_train, valid_valid = dataset.split_set(dataset.get_dataset('valid'), length=5000)
    # x_train, y_train = dataset_to_list(valid_train)
    # x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train)
    # valid_loader = dataset.get_dataloader('valid', dataset=valid_valid)

    # thieved_model._validate(print_prefix='Before Stealing', loader=valid_loader)
    # thieved_model._validate(print_prefix='After Stealing', loader=valid_loader)

    import art.attacks.extraction  # type:ignore
    for name in ['CopycatCNN', 'KnockoffNets']:
示例#16
0
def adv_train_loop(model,
                   params,
                   ds,
                   min_y,
                   base_data,
                   model_id,
                   attack_type,
                   device,
                   batch_size,
                   max_epochs=5):
    print('training adversarial:', attack_type)
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    original_model = copy.deepcopy(
        model)  # used to generate adv images for the trained model
    original_model.eval()
    model = copy.deepcopy(
        model)  # making a copy so that original model is not changed
    model = model.to(device)
    model_id = f'{model_id}_{attack_type}'

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        classifier = PyTorchClassifier(
            model=original_model,
            clip_values=(0, 1),
            loss=nn.CrossEntropyLoss(),
            optimizer=optimizer,
            input_shape=(3, 64, 64),
            nb_classes=200,
        )

        attack = None

        #         if attack_type == "fgsm":
        #             attack = FastGradientMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "bim":
        #             attack = BasicIterativeMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "carlini":
        #             attack = CarliniLInfMethod(classifier=classifier)
        #         elif attack_type == "deepfool":
        #             attack = DeepFool(classifier=classifier)
        if attack_type == "fgsm":
            attack = GradientSignAttack(model, loss_fn=loss, eps=0.2)
        elif attack_type == "ffa":
            attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3)
        elif attack_type == "carlini":
            attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000)
        elif attack_type == "lbfgs":
            attack = DeepFool(classifier=classifier)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with ctx_noparamgrad_and_eval(model):
                x_adv = attack.perturb(x, y)
            optimizer.zero_grad()
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
def robustness_evaluation(
    object_storage_url,
    object_storage_username,
    object_storage_password,
    data_bucket_name,
    result_bucket_name,
    model_id,
    feature_testset_path="processed_data/X_test.npy",
    label_testset_path="processed_data/y_test.npy",
    clip_values=(0, 1),
    nb_classes=2,
    input_shape=(1, 3, 64, 64),
    model_class_file="model.py",
    model_class_name="model",
    LossFn="",
    Optimizer="",
    epsilon=0.2,
):

    url = re.compile(r"https?://")
    cos = Minio(
        url.sub("", object_storage_url),
        access_key=object_storage_username,
        secret_key=object_storage_password,
        secure=False,
    )

    dataset_filenamex = "X_test.npy"
    dataset_filenamey = "y_test.npy"
    weights_filename = "model.pt"
    model_files = model_id + "/_submitted_code/model.zip"

    cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex)
    cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey)
    cos.fget_object(result_bucket_name, model_id + "/" + weights_filename,
                    weights_filename)
    cos.fget_object(result_bucket_name, model_files, "model.zip")

    # Load PyTorch model definition from the source code.
    zip_ref = zipfile.ZipFile("model.zip", "r")
    zip_ref.extractall("model_files")
    zip_ref.close()

    modulename = "model_files." + model_class_file.split(".")[0].replace(
        "-", "_")
    """
    We required users to define where the model class is located or follow
    some naming convention we have provided.
    """
    model_class = getattr(importlib.import_module(modulename),
                          model_class_name)

    # load & compile model
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model_class().to(device)
    model.load_state_dict(torch.load(weights_filename, map_location=device))

    # Define Loss and optimizer function for the PyTorch model
    if LossFn:
        loss_fn = eval(LossFn)
    else:
        loss_fn = torch.nn.CrossEntropyLoss()
    if Optimizer:
        optimizer = eval(Optimizer)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # create pytorch classifier
    classifier = PyTorchClassifier(
        model=model,
        loss=loss_fn,
        optimizer=optimizer,
        input_shape=input_shape,
        nb_classes=nb_classes,
        clip_values=clip_values,
    )

    # load test dataset
    x = np.load(dataset_filenamex)
    y = np.load(dataset_filenamey)

    # craft adversarial samples using FGSM
    crafter = FastGradientMethod(classifier, eps=epsilon)
    x_samples = crafter.generate(x)

    # obtain all metrics (robustness score, perturbation metric, reduction in confidence)
    metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y)

    print("metrics:", metrics)
    return metrics
示例#18
0
def test_fgsm(adv_model, dataset, loss_fn, optimizer, batch_size=32, num_workers=20, device='cuda:0', attack='fgsm', **kwargs):
	
	"""
	Train the model with the given training data
	:param x:
	:param y:
	:param epochs:
	"""

	epsilons =[0.00001, 0.0001, 0.004, 0.01, 0.1, 1, 10, 100] 
	label_dict = pkl.load(open('external/speaker2int_7323.pkl','rb'))

	extractor = mfcc_extractor(collate=False)
	adv_classifier = PyTorchClassifier(model=AdvModel(adv_model.cpu(), extractor.cpu()),
										loss=loss_fn,
										optimizer=optimizer,
										input_shape=[1, 32000],
										nb_classes=250)
	# Create Dataloader
	dataloader = DataLoader(dataset=dataset['eval'],
	  			batch_size=batch_size, 
				shuffle=False,
				num_workers=num_workers,
				collate_fn=PadBatch())

	n_iterations = len(dataloader)

	f_log_all, f_name_all = createLogFiles('all')
	with open(f_name_all, 'a+') as f_log_all:
		f_log_all.write("\n\n #################################### Begin #####################################")
		f_log_all.write("\n New Log: {}".format(datetime.now()))

	# Loop over all the training data for generator	
	n_files = 0
	accuracy = 0
	adv_acc_eps = {e: 0.0 for e in epsilons}
	success_eps = {e: 0.0 for e in epsilons}
	for i, (X, y, f) in enumerate(dataloader):
		
		if label_dict:
			y = torch.LongTensor([label_dict[y_] for y_ in y])

		# send data to the GPU
		y = y.to(device)

		x_mfccs, labels = extractor((X.to(device).transpose(1,2))), y
		clean_logits = adv_model.forward(x_mfccs)
		clean_class  = clean_logits.argmax(dim=-1)

		n_files 	 += len(X)
		tmp_accuracy = torch.sum(clean_class == y).detach().cpu()
		accuracy 	 += tmp_accuracy

		# Epsilon loop
		for e in epsilons:
	
			# FGSM
                        if attack == 'fgsm':
        		    attack = FastGradientMethod(estimator=adv_classifier, eps=e)
                        elif attack == 'bim':
                            attack = ProjectedGradientDescent(estimator=adv_classifier, eps=e, eps_step=e/5, max_iter=100)

			X_fgsm = torch.Tensor(attack.generate(x=X)).to(device)

			assert(len(X_fgsm) == len(X))

			pred_mfccs, labels_preds = extractor(X_fgsm.transpose(1,2)), y
			adv_logits = adv_model.forward(pred_mfccs)
			adv_class  = adv_logits.argmax(dim=-1)

			tmp_success = torch.sum(clean_class != adv_class).detach().cpu()
			tmp_adv_acc = torch.sum(y           == adv_class).detach().cpu()

			success_eps[e] += tmp_success
			adv_acc_eps[e] += tmp_adv_acc			

			# Update total loss and acc
			with open(f_name_all, 'a+') as f_log_all:
				f_log_all.write('File {}\tBatch {}\tEps {}\tTarg {}\tClean {}\tAdv {}\n'.format(
					f[0][-1], i+1, e, y.cpu().detach().numpy(), 
					clean_class.cpu().detach().numpy(),
					adv_class.cpu().detach().numpy()))
			
			for wav, fi in zip(X_fgsm, f):
				adv_path="samples/fgsm/{}".format(fi[-2])
				if not os.path.exists(adv_path):
					os.makedirs(adv_path)
				torchaudio.save("{}/{}_{}.wav".format(adv_path,fi[-1], e),  wav.squeeze().detach().cpu(), 8000)

			print("Epsilon: {}".format(e),
				  "Tmp Acc: {:.3f}".format((tmp_accuracy + 0.0) / len(X)),
				  "Tmp Adv: {:.3f}".format((tmp_adv_acc + 0.0)  / len(X)),
				  "Tmp Suc: {:.3f}".format((tmp_success + 0.0)  / len(X)))

	accuracy        = (accuracy + 0.0) / n_files
	adv_acc_eps     = {k : v / n_files for k, v in adv_acc_eps.items()}
	success_eps     = {k : v / n_files for k, v in success_eps.items()}


	with open(f_name_all, 'a+') as f_log_all:
		f_log_all.write('Epsilons: {} - Accuracy: {}%\tAdv Accuracy: {}%\tSuccess rate: {}%\n'.format(e, accuracy, adv_acc_eps, success_eps))

	return
示例#19
0
    def __init__(
        self,
        estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
        norm: Union[int, float, str] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        nb_random_init: int = 5,
        batch_size: int = 32,
        loss_type: Optional[str] = None,
    ):
        """
        Create a :class:`.AutoProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        """
        from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier

        if isinstance(estimator, TensorFlowClassifier):
            import tensorflow as tf

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise NotImplementedError(
                        "Cross-entropy loss is not implemented for probability output."
                    )
                else:
                    self._loss_object = tf.reduce_mean(
                        tf.keras.losses.categorical_crossentropy(
                            y_pred=estimator._output,
                            y_true=estimator._labels_ph,
                            from_logits=True))

                    def loss_fn(y_true, y_pred):
                        y_pred_norm = y_pred - np.amax(
                            y_pred, axis=1, keepdims=True)
                        loss_value = -(y_true * y_pred_norm - np.log(
                            np.sum(np.exp(y_pred_norm), axis=1,
                                   keepdims=True)))
                        return np.mean(loss_value)

                    self._loss_fn = loss_fn
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    def difference_logits_ratio(y_true, y_pred):
                        i_y_true = tf.cast(
                            tf.math.argmax(tf.cast(y_true, tf.int32), axis=1),
                            tf.int32)
                        i_y_pred_arg = tf.argsort(y_pred, axis=1)
                        i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:],
                                         i_y_pred_arg[:, -2], i_y_pred_arg[:,
                                                                           -1])

                        z_1 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -1],
                                        axis=1,
                                        batch_dims=0)
                        z_3 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -3],
                                        axis=1,
                                        batch_dims=0)
                        z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                        z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                        z_1 = tf.linalg.diag_part(z_1)
                        z_3 = tf.linalg.diag_part(z_3)
                        z_i = tf.linalg.diag_part(z_i)
                        z_y = tf.linalg.diag_part(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return tf.reduce_mean(dlr)

                    def loss_fn(y_true, y_pred):
                        i_y_true = np.argmax(y_true, axis=1)
                        i_y_pred_arg = np.argsort(y_pred, axis=1)
                        i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:],
                                         i_y_pred_arg[:, -1], i_y_pred_arg[:,
                                                                           -2])

                        z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                        z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                        z_i = y_pred[:, i_z_i]
                        z_y = y_pred[:, i_y_true]

                        z_1 = np.diag(z_1)
                        z_3 = np.diag(z_3)
                        z_i = np.diag(z_i)
                        z_y = np.diag(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return np.mean(dlr)

                    self._loss_fn = loss_fn
                    self._loss_object = difference_logits_ratio(
                        y_true=estimator._labels_ph, y_pred=estimator._output)
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = TensorFlowClassifier(
                input_ph=estimator._input_ph,
                output=estimator._output,
                labels_ph=estimator._labels_ph,
                train=estimator._train,
                loss=self._loss_object,
                learning=estimator._learning,
                sess=estimator._sess,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
                feed_dict=estimator._feed_dict,
            )

        elif isinstance(estimator, TensorFlowV2Classifier):
            import tensorflow as tf

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    self._loss_object = tf.keras.losses.CategoricalCrossentropy(
                        from_logits=False)
                    self._loss_fn = self._loss_object
                else:
                    self._loss_object = tf.keras.losses.CategoricalCrossentropy(
                        from_logits=True)
                    self._loss_fn = self._loss_object
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    def difference_logits_ratio(y_true, y_pred):
                        i_y_true = tf.cast(
                            tf.math.argmax(tf.cast(y_true, tf.int32), axis=1),
                            tf.int32)
                        i_y_pred_arg = tf.argsort(y_pred, axis=1)
                        i_z_i_list = list()

                        for i in range(y_true.shape[0]):
                            if i_y_pred_arg[i, -1] != i_y_true[i]:
                                i_z_i_list.append(i_y_pred_arg[i, -1])
                            else:
                                i_z_i_list.append(i_y_pred_arg[i, -2])

                        i_z_i = tf.stack(i_z_i_list)

                        z_1 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -1],
                                        axis=1,
                                        batch_dims=0)
                        z_3 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -3],
                                        axis=1,
                                        batch_dims=0)
                        z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                        z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                        z_1 = tf.linalg.diag_part(z_1)
                        z_3 = tf.linalg.diag_part(z_3)
                        z_i = tf.linalg.diag_part(z_i)
                        z_y = tf.linalg.diag_part(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return tf.reduce_mean(dlr)

                    self._loss_fn = difference_logits_ratio
                    self._loss_object = difference_logits_ratio
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = TensorFlowV2Classifier(
                model=estimator.model,
                nb_classes=estimator.nb_classes,
                input_shape=estimator.input_shape,
                loss_object=self._loss_object,
                train_step=estimator._train_step,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
            )
        elif isinstance(estimator, PyTorchClassifier):
            import torch

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(
                            x=np.ones(shape=(1, *estimator.input_shape),
                                      dtype=np.float32))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' "
                        "the estimator has to to predict logits.")
                else:

                    def loss_fn(y_true, y_pred):
                        return torch.nn.CrossEntropyLoss()(
                            torch.from_numpy(y_pred),
                            torch.from_numpy(np.argmax(y_true, axis=1)))

                    self._loss_fn = loss_fn
                    self._loss_object = torch.nn.CrossEntropyLoss()
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(
                            x=np.ones(shape=(1, *estimator.input_shape),
                                      dtype=ART_NUMPY_DTYPE))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    # def difference_logits_ratio(y_true, y_pred):
                    def difference_logits_ratio(y_pred,
                                                y_true):  # type: ignore
                        if isinstance(y_true, np.ndarray):
                            y_true = torch.from_numpy(y_true)
                        if isinstance(y_pred, np.ndarray):
                            y_pred = torch.from_numpy(y_pred)

                        y_true = y_true.float()

                        # dlr = torch.mean((y_pred - y_true) ** 2)
                        # return loss

                        i_y_true = torch.argmax(y_true, axis=1)
                        i_y_pred_arg = torch.argsort(y_pred, axis=1)
                        i_z_i_list = list()

                        for i in range(y_true.shape[0]):
                            if i_y_pred_arg[i, -1] != i_y_true[i]:
                                i_z_i_list.append(i_y_pred_arg[i, -1])
                            else:
                                i_z_i_list.append(i_y_pred_arg[i, -2])

                        i_z_i = torch.stack(i_z_i_list)

                        z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                        z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                        z_i = y_pred[:, i_z_i]
                        z_y = y_pred[:, i_y_true]

                        z_1 = torch.diagonal(z_1)
                        z_3 = torch.diagonal(z_3)
                        z_i = torch.diagonal(z_i)
                        z_y = torch.diagonal(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return torch.mean(dlr.float())

                    self._loss_fn = difference_logits_ratio
                    self._loss_object = difference_logits_ratio
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = PyTorchClassifier(
                model=estimator.model,
                loss=self._loss_object,
                input_shape=estimator.input_shape,
                nb_classes=estimator.nb_classes,
                optimizer=None,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
                device_type=estimator._device,
            )

        else:
            estimator_apgd = None

        super().__init__(estimator=estimator_apgd)
        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.nb_random_init = nb_random_init
        self.batch_size = batch_size
        self.loss_type = loss_type
        self._check_params()
示例#20
0
    def test_2_pt(self):
        """
        Test with a PyTorch Classifier.
        :return:
        """

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
        x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

        # Create a model from scratch
        class PyTorchModel(nn.Module):
            def __init__(self):
                super(PyTorchModel, self).__init__()
                self.conv_1 = nn.Conv2d(in_channels=1,
                                        out_channels=4,
                                        kernel_size=5,
                                        stride=1)
                self.conv_2 = nn.Conv2d(in_channels=4,
                                        out_channels=10,
                                        kernel_size=5,
                                        stride=1)
                self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
                self.fc_2 = nn.Linear(in_features=100, out_features=10)

            def forward(self, x):
                x = F.relu(self.conv_1(x))
                x = F.max_pool2d(x, 2, 2)
                x = F.relu(self.conv_2(x))
                x = F.max_pool2d(x, 2, 2)
                x = x.view(-1, 4 * 4 * 10)
                x = F.relu(self.fc_1(x))
                x = self.fc_2(x)
                return x

        # Step 2a: Define the loss function and the optimizer
        model = PyTorchModel()
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Step 3: Create the ART classifier

        classifier = PyTorchClassifier(
            model=model,
            clip_values=(0, 1),
            loss=criterion,
            optimizer=optimizer,
            input_shape=(1, 28, 28),
            nb_classes=10,
        )

        # Initialize DPA Classifier
        dpa = DeepPartitionEnsemble(
            classifiers=classifier,
            ensemble_size=ENSEMBLE_SIZE,
            channels_first=classifier.channels_first,
            clip_values=classifier.clip_values,
            preprocessing_defences=classifier.preprocessing_defences,
            postprocessing_defences=classifier.postprocessing_defences,
            preprocessing=classifier.preprocessing,
        )

        # Check basic functionality of DPA Classifier
        # check predict
        y_test_dpa = dpa.predict(x=x_test)
        self.assertEqual(y_test_dpa.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_dpa, axis=1) <= ENSEMBLE_SIZE * np.ones(
            (NB_TEST, ))).all())

        # loss gradient
        grad = dpa.loss_gradient(x=x_test, y=y_test, sampling=True)
        assert grad.shape == (10, 1, 28, 28)

        # fit
        dpa.fit(x=x_train, y=y_train)
示例#21
0
# model = mobilenet_v2(num_classes = 10)

# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 3: Create the ART classifier

classifier = PyTorchClassifier(
    model=model,
    clip_values=(0.0, 1.0),
    preprocessing=(cifar_mu, cifar_std),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(3, 32, 32),
    nb_classes=10,
)

# Step 4: Train the ART classifier
classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)
exp_time = time.strftime('%H_%M_%S')
# torch.save(classifier.model.state_dict(), 'pth/{}.pth.tar'.format(exp_time))
# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))
    def __init__(self,
                 estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
                 detector: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
                 detector_th: int = 0.5,
                 beta: int = 0.5,
                 detector_clip_fun=None,
                 norm: Union[int, float, str] = np.inf,
                 eps: float = 0.3,
                 eps_step: float = 0.1,
                 max_iter: int = 100,
                 targeted: bool = False,
                 nb_random_init: int = 5,
                 batch_size: int = 32,
                 loss_type: Optional[str] = None,
                 verbose: bool = True):
        """
        Create a :class:`.AutoProjectedGradientDescentDetectors` instance.

        :param estimator: A trained estimator.
        :param detector: A trained detector. Its prediction should be equal
        to 1 for the sample predicted as malicious and 0 for the ones
        predicted as benign.
        :param detector_th: Threshold to have a chosen number of false
        positives.
        :param beta: Constant which regulates the trade-off between the
        optimization of the classifier and the detector losses. In
        particular is the weight given to the detector's loss.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param verbose: Show progress bars.
        """
        from art.estimators.classification import PyTorchClassifier

        self.beta = beta
        self.detector_th = detector_th

        self.detector_clip_fun = detector_clip_fun

        if targeted is True:
            raise NotImplementedError("This attack so far do not works as a "
                                      "targeted attack. (the objective "
                                      "function and its gradient function "
                                      "need a little fix to make it work).")

        if isinstance(detector, PyTorchClassifier):
            import torch

            if detector.clip_values is not None:
                raise ValueError("The clip value of the detector cannot "
                                 "be different from None.")

            class detector_loss:
                """
                The detector loss is the detector score for the class 1
                - the detector threshold
                """
                def __init__(self):
                    self.reduction = "mean"

                def __call__(self, y_pred, y_true):  # type: ignore
                    """
                    y_pred are actually the logits.
                    y_true is actually unused.
                    """
                    if isinstance(y_pred, np.ndarray):
                        scores = torch.from_numpy(y_pred)
                    else:
                        scores = y_pred

                    # apply the softmax to have scores in 0 1
                    softmax_obj = Softmax(dim=1)
                    scores = softmax_obj(scores)

                    # consider the score assigned to the malicious class
                    scores = scores[:, 1]
                    scores = scores - detector_th

                    # create a vector of zeros
                    zero_vector = torch.zeros_like(scores)

                    # get the maximum values between scores - threshold and 0
                    scores = torch.max(scores, zero_vector)

                    if self.reduction == 'mean':
                        return torch.mean(scores)
                    else:
                        return scores

            self._det_loss_object = detector_loss()

            detector_apgd = PyTorchClassifier(
                model=detector.model,
                loss=self._det_loss_object,
                input_shape=detector.input_shape,
                nb_classes=detector.nb_classes,
                optimizer=None,
                channels_first=detector.channels_first,
                preprocessing_defences=detector.preprocessing_defences,
                postprocessing_defences=detector.postprocessing_defences,
                preprocessing=detector.preprocessing,
                device_type=detector._device,
            )

            self._det_loss_object = detector_loss()

        else:
            raise ValueError("The type of the detector classifier is not "
                             "supported.")

        self.detector = detector_apgd

        super().__init__(estimator=estimator,
                         norm=norm,
                         eps=eps,
                         eps_step=eps_step,
                         max_iter=max_iter,
                         targeted=targeted,
                         nb_random_init=nb_random_init,
                         batch_size=batch_size,
                         loss_type=loss_type,
                         verbose=verbose)
示例#23
0
def run_attack_untargeted(file_model, X, y, att_name, eps, device):
    path = file_model.split('/')[0]
    file_str = file_model.split('/')[-1]
    name_arr = file_str.split('_')
    data = name_arr[0]
    model_name = name_arr[1]
    file_data = os.path.join(
        path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name,
                                      round(eps * 1000)))

    if os.path.exists(file_data):
        print('Found existing file:', file_data)
        obj = torch.load(file_data)
        return obj['adv'], obj['X'], obj['y']

    if data == 'mnist':
        n_features = (1, 28, 28)
        n_classes = 10
        model = BaseModel(use_prob=False).to(device)
    elif data == 'cifar10':
        n_features = (3, 32, 32)
        n_classes = 10
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    model.load_state_dict(torch.load(file_model, map_location=device))
    loss = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    classifier = PyTorchClassifier(model=model,
                                   loss=loss,
                                   input_shape=n_features,
                                   optimizer=optimizer,
                                   nb_classes=n_classes,
                                   clip_values=(0.0, 1.0),
                                   device_type='gpu')

    if att_name == 'apgd':
        eps_step = eps / 4. if eps <= 0.2 else 0.1
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              eps=eps,
                                              eps_step=eps_step,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'apgd2':
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              norm=2,
                                              eps=eps,
                                              eps_step=0.1,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'cw2':
        # Do not increase the batch_size
        attack = CarliniWagnerAttackL2(model=model,
                                       n_classes=n_classes,
                                       confidence=eps,
                                       verbose=True,
                                       check_prob=False,
                                       batch_size=32,
                                       targeted=False)
    elif att_name == 'deepfool':
        # Do not adjust Epsilon
        attack = DeepFool(classifier=classifier, batch_size=BATCH_SIZE)
    elif att_name == 'fgsm':
        attack = FastGradientMethod(estimator=classifier,
                                    eps=eps,
                                    batch_size=BATCH_SIZE)
    elif att_name == 'line':
        attack = LineAttack(color=1, thickness=2)
    else:
        raise NotImplementedError

    time_start = time.time()
    adv = attack.generate(x=X)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    obj = {'X': X, 'y': y, 'adv': adv}
    torch.save(obj, file_data)
    print('Save data to:', file_data)

    return adv, X, y
示例#24
0
def attack_universal_perturbations_nontargeted(dataloader, model, model_info,
                                               args, checkpoint_dir, norm,
                                               eps):
    """
    UAP nontargeted attack 
    """
    device = args.device
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    img_size = model_info["model_img_size"]
    n_classes = model_info["num_classes"]

    classifier = PyTorchClassifier(
        model=model,
        loss=criterion,
        clip_values=(0.0, 1.0),
        optimizer=optimizer,
        input_shape=(img_size, img_size),
        nb_classes=n_classes,
        device_type=device,
    )

    attack = UniversalPerturbation(
        classifier=classifier,
        attacker="fgsm",
        attacker_params={
            "eps": eps,
            "batch_size": 32,
            "norm": norm
        },
        delta=0.25,
        max_iter=20,
        #max_iter=3,
        eps=eps,
        norm=norm,
    )

    # Launching a non-targeted attack
    # t = args.target_class
    print(f"Launching univ-pert nontargeted attack")
    #dest_images = os.path.join(checkpoint_dir, args.model_name)
    dest_images = checkpoint_dir
    os.makedirs(dest_images, exist_ok=True)

    # Running over the entire-batch to compute a universal perturbation
    for data in tqdm(dataloader):
        sample, label, img_path = data
        sample = sample.float()
        # Launch attack
        sample_adv = attack.generate(x=sample.cpu())

        # Code to save these images
        img_path = [it.split("/")[-1] for it in img_path]
        for i in range(len(sample_adv)):
            _img = sample_adv[i].transpose(1, 2, 0)
            skimage.io.imsave(os.path.join(dest_images, img_path[i]),
                              img_as_ubyte(_img))

        # Also save noise image for universal attack
        _img = attack.noise.squeeze(0).transpose(1, 2, 0)
        #import ipdb; ipdb.set_trace()
        skimage.io.imsave(os.path.join(dest_images, "noise.png"),
                          img_as_ubyte(_img))

    with open(os.path.join(dest_images, "stats.txt"), "w") as f:
        f.write(f"Fooling-rate was {attack.fooling_rate}\n")

    return dest_images
示例#25
0
    data_dir = config['data_dir']

    # Set up GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpu_id'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Set up model
    model = get_resnet18().to(device)
    model.eval()

    test_loader = get_test_loader(data_dir=data_dir, batch_size=50)
    classifier = PyTorchClassifier(
        model=model,
        loss=nn.CrossEntropyLoss(),
        input_shape=(3, 32, 32),
        nb_classes=10,
        optimizer=None,
        clip_values=(0, 1),
    )

    attack = ProjectedGradientDescentPyTorch(
        estimator=classifier,
        norm=np.inf,
        eps=config['epsilon'],
        eps_step=config['step_size'],
        max_iter=config['num_steps'],
        num_random_init=config['num_random_init'],
        batch_size=50,
    )
        
    # attack = AutoProjectedGradientDescent(
示例#26
0
test_set_x = dataset[2][0]
test_set_y = dataset[2][1]

data, target = torch.from_numpy(test_set_x), torch.from_numpy(test_set_y)
data = torch.reshape(data, [-1, 1, 57, 47])

print("The size of the input is:")
print(data.shape)

data, target = data.to(device), target.to(device)
"""
    White-Box Classifier
"""
classifier = PyTorchClassifier(model=model,
                               input_shape=(data.shape),
                               nb_classes=40,
                               loss=nn.CrossEntropyLoss(),
                               device_type="cpu")

original_predictions = classifier.predict(data)

accuracy = np.sum(
    np.argmax(original_predictions, axis=1) == np.argmax(
        test_set_y, axis=1)) / test_set_y.shape[0]
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Generate adversarial test examples
"""
    White-Box Attacks
"""
# FGSM
示例#27
0
    trojanvision.datasets.add_argument(parser)
    trojanvision.models.add_argument(parser)
    args = parser.parse_args()
    env = trojanvision.environ.create(**args.__dict__)
    dataset = trojanvision.datasets.create(**args.__dict__)
    model = trojanvision.models.create(dataset=dataset, **args.__dict__)
    if env['verbose']:
        summary(env=env, dataset=dataset, model=model)
    import torch
    import numpy as np
    from sklearn import metrics
    from trojanzoo.utils.data import dataset_to_list
    from art.estimators.classification import PyTorchClassifier  # type: ignore
    classifier = PyTorchClassifier(
        model=model._model,
        loss=model.criterion,
        input_shape=dataset.data_shape,
        nb_classes=model.num_classes,
    )
    model._validate()

    from art.attacks.inference.membership_inference import LabelOnlyDecisionBoundary as Attack  # type: ignore

    attack = Attack(classifier)
    x_train, y_train = dataset_to_list(dataset.get_dataset('train'))
    x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train)
    x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
    x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid)

    sample_size = 64

    tau_path = os.path.normpath(os.path.join(model.folder_path,
示例#28
0
# Step 2: create an interface for classifier, load trained model, to be used by attack model trainer

# Define the loss function and the optimizer for attack model trainer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(base_classifier.parameters(),
                      lr=0.1,
                      momentum=0.9,
                      weight_decay=1e-4)

# Create the ART classifier

classifier = PyTorchClassifier(
    model=base_classifier,
    clip_values=(min_pixel_value, max_pixel_value),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(3, 32, 32),
    nb_classes=10,
)

# Step 3: Train the ART classifier

# TODO: add option to train on demand

# Step 4: Evaluate the ART classifier on benign test examples

y_test = y_test[:args.max]  # limit the length of test set
trans = transforms.ToTensor(
)  # transform ndarray into tensor, normalize in the process
x_data = []  # list for test data
predictions = []  # list for prediction
示例#29
0
def main():
    with open('data.json') as data_json:
        data_params = json.load(data_json)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str)
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--pretrained', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks'])
    parser.add_argument('--eps', type=float, default=0.3)
    # NOTE: In CW_L2 attack, eps is the upper bound of c.
    parser.add_argument('--n_samples', type=int, default=2000)
    parser.add_argument('--random_state', type=int, default=1234)
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)
    
    if not os.path.exists(args.output_path):
        print('Output folder does not exist. Create:', args.output_path)
        os.mkdir(args.output_path)
        
    print('Dataset:', args.data)
    print('Pretrained model:', args.pretrained)
    print('Running attack: {}'.format(args.attack))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])

    if args.data == 'mnist':
        dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms)
    elif args.data == 'cifar10':
        dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms)
    else:
        data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name'])
        print('Read file:', data_path)
        X, y = load_csv(data_path)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=data_params['data'][args.data]['n_test'],
            random_state=args.random_state)
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
        dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long))

    dataloader_train = DataLoader(dataset_train, 256, shuffle=False)
    dataloader_test = DataLoader(dataset_test, 256, shuffle=False)

    shape_train = get_shape(dataloader_train.dataset)
    shape_test = get_shape(dataloader_test.dataset)
    print('Train set:', shape_train)
    print('Test set:', shape_test)

    # Load model
    use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf']
    print('Attack:', args.attack)
    print('Using softmax layer:', use_prob)
    if args.data == 'mnist':
        model = BaseModel(use_prob=use_prob).to(device)
        model_name = 'basic'
    elif args.data == 'cifar10':
        model_name = args.pretrained.split('_')[1]
        if model_name == 'resnet':
            model = Resnet(use_prob=use_prob).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=use_prob).to(device)
        else:
            raise ValueError('Unknown model: {}'.format(model_name))
    else:
        n_features = data_params['data'][args.data]['n_features']
        n_classes = data_params['data'][args.data]['n_classes']
        model = NumericModel(
            n_features,
            n_hidden=n_features * 4,
            n_classes=n_classes,
            use_prob=use_prob).to(device)
        model_name = 'basic' + str(n_features * 4)

    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    pretrained_path = os.path.join(args.output_path, args.pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))

    _, acc_train = validate(model, dataloader_train, loss, device)
    _, acc_test = validate(model, dataloader_test, loss, device)
    print('Accuracy on train set: {:.4f}%'.format(acc_train * 100))
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    # Create a subset which only contains recognisable samples.
    tensor_test_X, tensor_test_y = get_correct_examples(
        model, dataset_test, device=device, return_tensor=True)
    dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y)
    loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader_perfect, loss, device)
    print('Accuracy on {} filtered test examples: {:.4f}%'.format(
        len(dataset_perfect), acc_perfect * 100))

    # Generate adversarial examples
    n_features = data_params['data'][args.data]['n_features']
    n_classes = data_params['data'][args.data]['n_classes']
    if isinstance(n_features, int):
        n_features = (n_features,)

    classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=n_features,
        optimizer=optimizer,
        nb_classes=n_classes,
        clip_values=(0.0, 1.0),
        device_type='gpu')

    if args.attack == 'apgd':
        eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd1':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=1,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd2':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=2,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'bim':
        eps_step = args.eps / 10.0
        attack = BasicIterativeMethod(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'boundary':
        attack = BoundaryAttack(
            estimator=classifier,
            max_iter=1000,
            sample_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cw2':
        # NOTE: Do NOT increase the batch size!
        attack = CarliniWagnerAttackL2(
            model=model,
            n_classes=n_classes,
            confidence=args.eps,
            verbose=True,
            check_prob=False,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cwinf':
        attack = CarliniLInfMethod(
            classifier=classifier,
            confidence=args.eps,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'deepfool':
        attack = DeepFool(
            classifier=classifier,
            epsilon=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'fgsm':
        attack = FastGradientMethod(
            estimator=classifier,
            eps=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'jsma':
        attack = SaliencyMapMethod(
            classifier=classifier,
            gamma=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'line':
        if args.data == 'mnist':
            color = args.eps
        elif args.data == 'cifar10':
            color = (args.eps, args.eps, args.eps)
        else:
            raise NotImplementedError
        attack = LineAttack(color=color, thickness=1)
    elif args.attack == 'shadow':
        attack = ShadowAttack(
            estimator=classifier,
            batch_size=args.batch_size,
            targeted=False,
            verbose=False)
    elif args.attack == 'watermark':
        attack = WaterMarkAttack(
            eps=args.eps,
            n_classes=data_params['data'][args.data]['n_classes'],
            x_min=0.0,
            x_max=1.0,
            targeted=False)

        X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
        X_train = X_train.cpu().detach().numpy()
        y_train = y_train.cpu().detach().numpy()
        attack.fit(X_train, y_train)
    else:
        raise NotImplementedError

    if len(dataset_perfect) > args.n_samples:
        n = args.n_samples
    else:
        n = len(dataset_perfect)

    X_benign = tensor_test_X[:n].cpu().detach().numpy()
    y = tensor_test_y[:n].cpu().detach().numpy()

    print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps))
    time_start = time.time()
    # Shadow attack only takes single sample!
    if args.attack == 'shadow':
        adv = np.zeros_like(X_benign)
        for i in trange(len(X_benign)):
            adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0))
    elif args.attack == 'watermark':
        # This is untargeted.
        adv = attack.generate(X_benign, y)
    else:
        adv = attack.generate(x=X_benign)
    time_elapsed = time.time() - time_start
    print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed))))

    pred_benign = np.argmax(classifier.predict(X_benign), axis=1)
    acc_benign = np.sum(pred_benign == y) / n
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.sum(pred_adv == y) / n
    print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100))
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    # Save results
    if args.n_samples < 2000:
        output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples)
    else:
        output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps))

    path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file))
    path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file))
    path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file))
    np.save(path_x, X_benign)
    np.save(path_y, y)
    np.save(path_adv, adv)

    print('Saved to:', '{}_adv.npy'.format(output_file))
    print()
示例#30
0
def train(dataloader, model, criterion, optimizer, scheduler, epoch):
    model.train()
    print('epoch ' + str(epoch))

    train_loss = 0.0
    train_acc = 0.0
    total = len(dataloader)
    start = time.time()
    toPilImage = transforms.ToPILImage(
    )  # transform tensor into PIL image to save

    for batch_num, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)

        # gauss noise training
        gauss_noise = torch.randn_like(x, device=device) * args.noise_sd
        # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd

        # targeted noise training
        tmp_criterion = nn.CrossEntropyLoss()
        tmp_optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
        classifier = PyTorchClassifier(
            model=model,
            clip_values=(min_pixel_value, max_pixel_value),
            loss=tmp_criterion,
            optimizer=tmp_optimizer,
            input_shape=(3, 32, 32),
            nb_classes=10,
        )
        # generate random targets
        targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes())

        # calculate loss gradient
        grad = classifier.loss_gradient(x=x.cpu().numpy(), y=targets) * (-1.0)
        scaled_grad = torch.Tensor(grad * args.eps_step).to(device)

        # print((scaled_grad.shape, gauss_noise.shape, targets.shape))

        # combine noise and targeted noise
        x_combine = x + (gauss_noise *
                         (1.0 - args.k_value)) + (scaled_grad * args.k_value)

        model.zero_grad()

        output = model(x_combine)
        loss = criterion(output, y)
        acc = accuracy(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_acc += acc

    scheduler.step()
    end = time.time()
    print('trainning time:', end - start, 'sec, loss: ', train_loss / total,
          'acc: ', train_acc / total)
    return train_loss / total, train_acc / total