示例#1
0
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just
                              the predicted class.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member,
                 or class probabilities.
        """
        if y is None:  # pragma: no cover
            raise ValueError(
                "MembershipInferenceBlackBoxRuleBased requires true labels `y`."
            )

        if self.estimator.input_shape is not None:  # pragma: no cover
            if self.estimator.input_shape[0] != x.shape[1]:
                raise ValueError(
                    "Shape of x does not match input_shape of classifier")

        if "probabilities" in kwargs:
            probabilities = kwargs.get("probabilities")
        else:
            probabilities = False

        y = check_and_transform_label_format(y,
                                             len(np.unique(y)),
                                             return_one_hot=True)
        if y is None:
            raise ValueError("None value detected.")
        if y.shape[0] != x.shape[0]:  # pragma: no cover
            raise ValueError("Number of rows in x and y do not match")

        # get model's predictions for x
        y_pred = self.estimator.predict(x=x)
        predicted_class = (np.argmax(y,
                                     axis=1) == np.argmax(y_pred,
                                                          axis=1)).astype(int)
        if probabilities:
            # use y_pred as the probability if binary classification, otherwise just use 1
            if y_pred.shape[1] == 2:
                pred_prob = np.max(y_pred, axis=1)
                prob = np.zeros((predicted_class.shape[0], 2))
                prob[:, predicted_class] = pred_prob
                prob[:, np.ones_like(predicted_class) -
                     predicted_class] = np.ones_like(pred_prob) - pred_prob
            else:
                # simply returns probability 1 for the predicted class and 0 for the other class
                prob_none = check_and_transform_label_format(
                    predicted_class, return_one_hot=True)
                if prob_none is not None:
                    prob = prob_none
            return prob
        return predicted_class
示例#2
0
    def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
        """
        Train the attack model.

        :param x: Input to training process. Includes all features used to train the original model.
        :param y: True labels for x.
        """

        # Checks:
        if self.estimator.input_shape is not None:
            if self.estimator.input_shape[0] != x.shape[1]:
                raise ValueError(
                    "Shape of x does not match input_shape of model")
        if isinstance(self.attack_feature,
                      int) and self.attack_feature >= x.shape[1]:
            raise ValueError(
                "`attack_feature` must be a valid index to a feature in x")

        # get model's predictions for x
        if ClassifierMixin in type(self.estimator).__mro__:
            predictions = np.array([
                np.argmax(arr) for arr in self.estimator.predict(x)
            ]).reshape(-1, 1)
        else:  # Regression model
            if self.scale_range is not None:
                predictions = minmax_scale(self.estimator.predict(x).reshape(
                    -1, 1),
                                           feature_range=self.scale_range)
                if y is not None:
                    y = minmax_scale(y, feature_range=self.scale_range)
            else:
                predictions = self.estimator.predict(x).reshape(
                    -1, 1) * self.prediction_normal_factor
                if y is not None:
                    y = y * self.prediction_normal_factor

        # get vector of attacked feature
        y_attack = x[:, self.attack_feature]
        self._values = get_feature_values(y_attack,
                                          isinstance(self.attack_feature, int))
        if isinstance(self.attack_feature, int):
            y_one_hot = float_to_categorical(y_attack)
        else:
            y_one_hot = floats_to_one_hot(y_attack)
        y_attack_ready = check_and_transform_label_format(
            y_one_hot, len(np.unique(y_attack)), return_one_hot=True)

        # create training set for attack model
        x_train = np.concatenate(
            (np.delete(x, self.attack_feature, 1), predictions),
            axis=1).astype(np.float32)

        if y is not None:
            y = check_and_transform_label_format(y, return_one_hot=True)
            x_train = np.concatenate((x_train, y), axis=1)

        # train attack model
        self.attack_model.fit(x_train, y_attack_ready)
示例#3
0
    def test_check_and_transform_label_format(self):
        labels_expected = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
                                    [0, 0, 0, 0, 1]])
        labels_expected_binary = np.array([[0, 1], [1, 0], [0, 1]])

        # test input shape (nb_samples,)
        labels = np.array([3, 1, 4])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=5, return_one_hot=True)
        np.testing.assert_array_equal(labels_transformed, labels_expected)

        # test input shape (nb_samples, 1)
        labels = np.array([[3], [1], [4]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=5, return_one_hot=True)
        np.testing.assert_array_equal(labels_transformed, labels_expected)

        # test input shape (nb_samples, 1) - binary
        labels = np.array([[1], [0], [1]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=2, return_one_hot=True)
        np.testing.assert_array_equal(labels_transformed,
                                      labels_expected_binary)

        # test input shape (nb_samples, 1) - binary
        labels = np.array([[0, 1], [1, 0], [0, 1]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=2, return_one_hot=True)
        np.testing.assert_array_equal(labels_transformed,
                                      labels_expected_binary)

        # test input shape (nb_samples, nb_classes)
        labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=5, return_one_hot=True)
        np.testing.assert_array_equal(labels_transformed, labels_expected)

        # test input shape (nb_samples, nb_classes) with return_one_hot=False
        labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=5, return_one_hot=False)
        np.testing.assert_array_equal(
            labels_transformed,
            np.expand_dims(np.argmax(labels_expected, axis=1), axis=1))

        # test input shape (nb_samples, 1) - binary
        labels = np.array([[1], [0], [1]])
        labels_transformed = check_and_transform_label_format(
            labels, nb_classes=2, return_one_hot=False)
        np.testing.assert_array_equal(
            labels_transformed,
            np.expand_dims(np.argmax(labels_expected_binary, axis=1), axis=1))

        # ValueError for len(labels.shape) > 2
        labels = np.array([[[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0,
                                                               1]]])
        with self.assertRaises(ValueError):
            check_and_transform_label_format(labels)
示例#4
0
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if y is None:
            raise ValueError(
                "MembershipInferenceBlackBoxRuleBased requires true labels `y`."
            )

        if self.estimator.input_shape[0] != x.shape[1]:
            raise ValueError(
                "Shape of x does not match input_shape of classifier")

        y = check_and_transform_label_format(y,
                                             len(np.unique(y)),
                                             return_one_hot=True)
        y = np.array([np.argmax(arr) for arr in y]).reshape(-1, 1)
        if y.shape[0] != x.shape[0]:
            raise ValueError("Number of rows in x and y do not match")

        # get model's predictions for x
        predictions = np.array([
            np.argmax(arr) for arr in self.estimator.predict(x)
        ]).reshape(-1, 1)
        return np.asarray(
            [1 if p == y[index] else 0 for index, p in enumerate(predictions)])
    def fit(self, x: np.ndarray) -> None:
        """
        Train the attack model.

        :param x: Input to training process. Includes all features used to train the original model.
        """

        # Checks:
        if self.estimator.input_shape[0] != x.shape[1]:
            raise ValueError(
                "Shape of x does not match input_shape of classifier")
        if self.attack_feature >= x.shape[1]:
            raise ValueError(
                "attack_feature must be a valid index to a feature in x")

        # get model's predictions for x
        predictions = np.array([
            np.argmax(arr) for arr in self.estimator.predict(x)
        ]).reshape(-1, 1)

        # get vector of attacked feature
        y = x[:, self.attack_feature]
        y_one_hot = float_to_categorical(y)
        y_ready = check_and_transform_label_format(y_one_hot,
                                                   len(np.unique(y)),
                                                   return_one_hot=True)

        # create training set for attack model
        x_train = np.concatenate(
            (np.delete(x, self.attack_feature, 1), predictions),
            axis=1).astype(np.float32)

        # train attack model
        self.attack_model.fit(x_train, y_ready)
示例#6
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        if self.estimator.nb_classes == 2 and y.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in trange(nb_batches,
                               desc="ZOO",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            "Success rate of ZOO attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
示例#7
0
    def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: int = 10, **kwargs) -> None:
        """
        Fit the classifier on the training set `(x, y)`.

        :param x: Training data.
        :param y: Labels, one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
                  shape (nb_samples,).
        :param batch_size: Size of batches.
        :param nb_epochs: Number of epochs to use for training.
        :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for
               TensorFlow and providing it takes no effect.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        if self._train_step is None:
            raise TypeError(
                "The training function `train_step` is required for fitting a model but it has not been " "defined."
            )

        y = check_and_transform_label_format(y, self.nb_classes)

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y, fit=True)

        # Check label shape
        if self._reduce_labels:
            y_preprocessed = np.argmax(y_preprocessed, axis=1)

        train_ds = tf.data.Dataset.from_tensor_slices((x_preprocessed, y_preprocessed)).shuffle(10000).batch(batch_size)

        for _ in range(nb_epochs):
            for images, labels in train_ds:
                self._train_step(self.model, images, labels)
示例#8
0
    def fit(self,
            x: np.ndarray,
            y: np.ndarray,
            batch_size: int = 128,
            nb_epochs: int = 20,
            **kwargs) -> None:
        """
        Fit the classifier on the training set `(x, y)`.

        :param x: Training data.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
                  shape (nb_samples,).
        :param batch_size: Size of batches.
        :param nb_epochs: Number of epochs to use for training.
        :param kwargs: Dictionary of framework-specific arguments. These should be parameters supported by the
               `fit_generator` function in Keras and will be passed to this function as such. Including the number of
               epochs or the number of steps per epoch as part of this argument will result in as error.
        """
        y = check_and_transform_label_format(y, self.nb_classes)

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x,
                                                                   y,
                                                                   fit=True)

        # Adjust the shape of y for loss functions that do not take labels in one-hot encoding
        if self._reduce_labels:
            y_preprocessed = np.argmax(y_preprocessed, axis=1)

        gen = generator_fit(x_preprocessed, y_preprocessed, batch_size)
        steps_per_epoch = max(int(x_preprocessed.shape[0] / batch_size), 1)
        self._model.fit_generator(gen,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=nb_epochs,
                                  **kwargs)
示例#9
0
    def _set_targets(self, x: np.ndarray, y: np.ndarray, classifier_mixin: bool = True) -> np.ndarray:
        """
        Check and set up targets.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param classifier_mixin: Whether the estimator is of type `ClassifierMixin`.
        :return: The targets.
        """
        if classifier_mixin:
            y = check_and_transform_label_format(y, self.estimator.nb_classes)

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:  # pragma: no cover
                raise ValueError("Target labels `y` need to be provided for a targeted attack.")

            # Use model predictions as correct outputs
            if classifier_mixin:
                targets = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))
            else:
                targets = self.estimator.predict(x, batch_size=self.batch_size)

        else:
            targets = y

        return targets
    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
        """
        Train the attack model.

        :param x: Input to training process. Includes all features used to train the original model.
        :param y: True labels of the features.
        """

        # Checks:
        if self.single_index_feature and self.attack_feature >= x.shape[1]:
            raise ValueError(
                "attack_feature must be a valid index to a feature in x")

        # get vector of attacked feature
        attacked_feature = x[:, self.attack_feature]
        if self.single_index_feature:
            y_one_hot = float_to_categorical(attacked_feature)
        else:
            y_one_hot = floats_to_one_hot(attacked_feature)
        y_ready = check_and_transform_label_format(
            y_one_hot, len(np.unique(attacked_feature)), return_one_hot=True)

        # create training set for attack model
        normalized_labels = y * self.prediction_normal_factor
        x_train = np.concatenate(
            (np.delete(x, self.attack_feature, 1), normalized_labels),
            axis=1).astype(np.float32)

        # train attack model
        self.attack_model.fit(x_train, y_ready)
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if y is None:
            raise ValueError(
                "MembershipInferenceBlackBoxRuleBased requires true labels `y`."
            )

        if self.estimator.input_shape is not None:
            if self.estimator.input_shape[0] != x.shape[1]:
                raise ValueError(
                    "Shape of x does not match input_shape of classifier")

        y = check_and_transform_label_format(y,
                                             len(np.unique(y)),
                                             return_one_hot=True)
        if y.shape[0] != x.shape[0]:
            raise ValueError("Number of rows in x and y do not match")

        # get model's predictions for x
        y_pred = self.estimator.predict(x=x)
        return (np.argmax(y, axis=1) == np.argmax(y_pred,
                                                  axis=1)).astype(np.int)
示例#12
0
    def fit(self, x: np.ndarray) -> None:
        """
        Train the attack model.

        :param x: Input to training process. Includes all features used to train the original model.
        """

        # Checks:
        if isinstance(self.attack_feature,
                      int) and self.attack_feature >= x.shape[1]:
            raise ValueError(
                "attack_feature must be a valid index to a feature in x")

        # get vector of attacked feature
        y = x[:, self.attack_feature]
        self._values = get_feature_values(y,
                                          isinstance(self.attack_feature, int))
        if isinstance(self.attack_feature, int):
            y_one_hot = float_to_categorical(y)
        else:
            y_one_hot = floats_to_one_hot(y)
        y_ready = check_and_transform_label_format(y_one_hot,
                                                   len(np.unique(y)),
                                                   return_one_hot=True)
        if y_ready is None:
            raise ValueError("None value detected.")

        # create training set for attack model
        x_train = np.delete(x, self.attack_feature, 1).astype(np.float32)

        # train attack model
        self.attack_model.fit(x_train, y_ready)
示例#13
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Assert that, if attack is targeted, y is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        if self.estimator.nb_classes == 2 and y.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches,
                               desc="EAD",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                x_batch, y_batch)

        # Apply clip
        if self.estimator.clip_values is not None:
            x_adv = np.clip(x_adv, self.estimator.clip_values[0],
                            self.estimator.clip_values[1])

        # Compute success rate of the EAD attack
        logger.info(
            "Success rate of EAD attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
示例#14
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        """
        Generate an adversarial patch and return the patch and its mask in arrays.

        :param x: An array with the original input images of shape NHWC or input videos of shape NFHWC.
        :param y: An array with the original true labels.
        :return: An array with adversarial patch and an array of the patch mask.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        y = check_and_transform_label_format(
            labels=y, nb_classes=self.estimator.nb_classes)

        shuffle = kwargs.get("shuffle", True)
        if shuffle:
            ds = (tf.data.Dataset.from_tensor_slices(
                (x, y)).shuffle(10000).batch(self.batch_size).repeat(
                    math.ceil(x.shape[0] / self.batch_size)))
        else:
            ds = (tf.data.Dataset.from_tensor_slices(
                (x, y)).batch(self.batch_size).repeat(
                    math.ceil(x.shape[0] / self.batch_size)))

        for _ in trange(self.max_iter, desc="Adversarial Patch TensorFlow v2"):
            for images, target in ds:
                _ = self._train_step(images=images, target=target)

        return (
            self._patch.numpy(),
            self._get_circular_patch_mask(nb_samples=1).numpy()[0],
        )
示例#15
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param cost_matrix: A non-negative cost matrix.
        :type cost_matrix: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        if y is not None:
            y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.copy().astype(ART_NUMPY_DTYPE)

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError(
                    "Target labels `y` need to be provided for a targeted attack."
                )

            # Use model predictions as correct outputs
            targets = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))
        else:
            targets = y

        if self.estimator.nb_classes == 2 and targets.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute the cost matrix if needed
        cost_matrix = kwargs.get("cost_matrix")
        if cost_matrix is None:
            cost_matrix = self._compute_cost_matrix(self.p, self.kernel_size)

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches,
                               desc="Wasserstein",
                               disable=not self.verbose):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]
            batch_labels = targets[batch_index_1:batch_index_2]

            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                batch, batch_labels, cost_matrix)

        return x_adv
示例#16
0
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,).
        :type y: `np.ndarray`
        :param x_adv_init: Initial array to act as initial adversarial examples. Same shape as `x`.
        :type x_adv_init: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Get clip_min and clip_max from the classifier or infer them from data
        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
        else:
            clip_min, clip_max = np.min(x), np.max(x)

        # Prediction from the original images
        preds = np.argmax(self.classifier.predict(x, batch_size=self.batch_size), axis=1)

        # Prediction from the initial adversarial examples if not None
        x_adv_init = kwargs.get('x_adv_init')

        if x_adv_init is not None:
            init_preds = np.argmax(self.classifier.predict(x_adv_init, batch_size=self.batch_size), axis=1)
        else:
            init_preds = [None] * len(x)
            x_adv_init = [None] * len(x)

        # Assert that, if attack is targeted, y is provided
        if self.targeted and y is None:
            raise ValueError('Target labels `y` need to be provided for a targeted attack.')

        # Some initial setups
        x_adv = x.astype(NUMPY_DTYPE)
        if y is not None:
            y = np.argmax(y, axis=1)

        # Generate the adversarial samples
        for ind, val in enumerate(x_adv):
            if self.targeted:
                x_adv[ind] = self._perturb(x=val, y=y[ind], y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)
            else:
                x_adv[ind] = self._perturb(x=val, y=-1, y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)

        if y is not None:
            y = to_categorical(y, self.classifier.nb_classes())

        logger.info('Success rate of HopSkipJump attack: %.2f%%',
                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted, batch_size=self.batch_size))

        return x_adv
示例#17
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial examples and return them as an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes, return_one_hot=False)
        x_adv = x.copy()

        for index in trange(x_adv.shape[0], desc="Decision tree attack", disable=not self.verbose):
            path = self.estimator.get_decision_path(x_adv[index])
            legitimate_class = np.argmax(self.estimator.predict(x_adv[index].reshape(1, -1)))
            position = -2
            adv_path = [-1]
            ancestor = path[position]
            while np.abs(position) < (len(path) - 1) or adv_path[0] == -1:
                ancestor = path[position]
                current_child = path[position + 1]
                # search in right subtree
                if current_child == self.estimator.get_left_child(ancestor):
                    if y is None:
                        adv_path = self._df_subtree(self.estimator.get_right_child(ancestor), legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_right_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                else:  # search in left subtree
                    if y is None:
                        adv_path = self._df_subtree(self.estimator.get_left_child(ancestor), legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_left_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                position = position - 1  # we are going the decision path upwards
            adv_path.append(ancestor)
            # we figured out which is the way to the target, now perturb
            # first one is leaf-> no threshold, cannot be perturbed
            for i in range(1, 1 + len(adv_path[1:])):
                go_for = adv_path[i - 1]
                threshold = self.estimator.get_threshold_at_node(adv_path[i])
                feature = self.estimator.get_feature_at_node(adv_path[i])
                # only perturb if the feature is actually wrong
                if x_adv[index][feature] > threshold and go_for == self.estimator.get_left_child(adv_path[i]):
                    x_adv[index][feature] = threshold - self.offset
                elif x_adv[index][feature] <= threshold and go_for == self.estimator.get_right_child(adv_path[i]):
                    x_adv[index][feature] = threshold + self.offset

        logger.info(
            "Success rate of decision tree attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv),
        )
        return x_adv
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if y is None:
            raise ValueError(
                "MembershipInferenceBlackBox requires true labels `y`.")

        if self.estimator.input_shape[0] != x.shape[1]:
            raise ValueError(
                "Shape of x does not match input_shape of classifier")

        y = check_and_transform_label_format(y,
                                             len(np.unique(y)),
                                             return_one_hot=True)

        if y.shape[0] != x.shape[0]:
            raise ValueError("Number of rows in x and y do not match")

        if self.input_type == "prediction":
            features = self.estimator.predict(x).astype(np.float32)
        elif self.input_type == "loss":
            features = self.estimator.loss(x, y).astype(np.float32).reshape(
                -1, 1)

        if self.default_model and self.attack_model_type == "nn":
            import torch  # lgtm [py/repeated-import]
            from torch.utils.data import DataLoader  # lgtm [py/repeated-import]

            self.attack_model.eval()
            inferred = None
            test_set = self._get_attack_dataset(f_1=features, f_2=y)
            test_loader = DataLoader(test_set,
                                     batch_size=self.batch_size,
                                     shuffle=True,
                                     num_workers=0)
            for input1, input2, _ in test_loader:
                outputs = self.attack_model(input1, input2)
                predicted = torch.round(outputs)
                if inferred is None:
                    inferred = predicted.detach().numpy()
                else:
                    inferred = np.vstack(
                        (inferred, predicted.detach().numpy()))
            inferred = inferred.reshape(-1).astype(np.int)
        else:
            inferred = np.array([
                np.argmax(arr)
                for arr in self.attack_model.predict(np.c_[features, y])
            ])
        return inferred
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.classifier.predict(x, batch_size=self.batch_size))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in range(nb_batches):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if hasattr(self.classifier,
                   "clip_values") and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            "Success rate of ZOO attack: %.2f%%",
            100 * compute_success(self.classifier,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
示例#20
0
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership of input `x` in estimator's training data.

        :param x: Input data.
        :param y: True labels for `x`.

        :Keyword Arguments for HopSkipJump:
            * *norm*: Order of the norm. Possible values: "inf", np.inf or 2.
            * *max_iter*: Maximum number of iterations.
            * *max_eval*: Maximum number of evaluations for estimating gradient.
            * *init_eval*: Initial number of evaluations for estimating gradient.
            * *init_size*: Maximum number of trials for initial generation of adversarial examples.
            * *verbose*: Show progress bars.

        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        from art.attacks.evasion.hop_skip_jump import HopSkipJump

        if y is None:
            raise ValueError(
                "Argument `y` is None, but this attack requires true labels `y` to be provided."
            )

        if self.distance_threshold_tau is None:
            raise ValueError(
                "No value for distance threshold `distance_threshold_tau` provided. Please set"
                "`distance_threshold_tau` or run method `calibrate_distance_threshold` on known training and test"
                "dataset.")

        if "classifier" in kwargs:
            raise ValueError(
                "Keyword `classifier` in kwargs is not supported.")

        if "targeted" in kwargs:
            raise ValueError("Keyword `targeted` in kwargs is not supported.")

        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        hsj = HopSkipJump(classifier=self.estimator, targeted=False, **kwargs)
        x_adv = hsj.generate(x=x, y=y)

        distance = np.linalg.norm((x_adv - x).reshape((x.shape[0], -1)),
                                  ord=2,
                                  axis=1)

        y_pred = self.estimator.predict(x=x)

        distance[np.argmax(y_pred, axis=1) != np.argmax(y, axis=1)] = 0

        is_member = np.where(distance > self.distance_threshold_tau, 1, 0)

        return is_member
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Assert that, if attack is targeted, y is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.classifier.predict(x, batch_size=self.batch_size))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in range(nb_batches):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                x_batch, y_batch)

        # Apply clip
        if hasattr(self.classifier,
                   "clip_values") and self.classifier.clip_values is not None:
            x_adv = np.clip(x_adv, self.classifier.clip_values[0],
                            self.classifier.clip_values[1])

        # Compute success rate of the EAD attack
        logger.info(
            "Success rate of EAD attack: %.2f%%",
            100 * compute_success(self.classifier,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
    def infer(self, x: Optional[np.ndarray], y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Extract a thieved classifier.

        :param x: An array with the initial input to the victim classifier. If `None`, then initial input will be
                  initialized as zero array.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: The inferred training samples.
        """
        if x is None and y is None:
            raise ValueError("Either `x` or `y` should be provided.")

        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        if x is None:
            x = np.zeros((len(y),) + self.estimator.input_shape)

        if y is None:
            y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))

        x_infer = x.astype(ART_NUMPY_DTYPE)

        # Compute inversions with implicit batching
        for batch_id in trange(
            int(np.ceil(x.shape[0] / float(self.batch_size))), desc="Model inversion", disable=not self.verbose
        ):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_infer[batch_index_1:batch_index_2]
            batch_labels = y[batch_index_1:batch_index_2]

            active = np.array([True] * len(batch))
            window = np.inf * np.ones((len(batch), self.window_length))

            i = 0

            while i < self.max_iter and sum(active) > 0:
                grads = self.estimator.class_gradient(batch[active], np.argmax(batch_labels[active], axis=1))
                grads = np.reshape(grads, (grads.shape[0],) + grads.shape[2:])
                batch[active] = batch[active] + self.learning_rate * grads

                if self.estimator.clip_values is not None:
                    clip_min, clip_max = self.estimator.clip_values
                    batch[active] = np.clip(batch[active], clip_min, clip_max)

                cost = 1 - self.estimator.predict(batch)[np.arange(len(batch)), np.argmax(batch_labels, axis=1)]
                active = (cost <= self.threshold) + (cost >= np.max(window, axis=1))

                i_window = i % self.window_length
                window[::, i_window] = cost

                i = i + 1

            x_infer[batch_index_1:batch_index_2] = batch

        return x_infer
示例#23
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array. This requires a lot of memory, therefore it accepts
        only a single samples as input, e.g. a batch of size 1.

        :param x: An array of a single original input sample.
        :param y: An array of a single target label.
        :return: An array with the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError("Target labels `y` need to be provided for a targeted attack.")

            logger.info("Using model predictions as correct labels for FGM.")
            y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))
        else:
            self.targeted = True

        if x.shape[0] > 1 or y.shape[0] > 1:
            raise ValueError("This attack only accepts a single sample as input.")

        if x.ndim != 4:
            raise ValueError("Unrecognized input dimension. Shadow Attack can only be applied to image data.")

        x = x.astype(ART_NUMPY_DTYPE)
        x_batch = np.repeat(x, repeats=self.batch_size, axis=0).astype(ART_NUMPY_DTYPE)
        x_batch = x_batch + np.random.normal(scale=self.sigma, size=x_batch.shape).astype(ART_NUMPY_DTYPE)
        y_batch = np.repeat(y, repeats=self.batch_size, axis=0)

        perturbation = (
            np.random.uniform(
                low=self.estimator.clip_values[0], high=self.estimator.clip_values[1], size=x.shape
            ).astype(ART_NUMPY_DTYPE)
            - (self.estimator.clip_values[1] - self.estimator.clip_values[0]) / 2
        )

        for _ in trange(self.nb_steps, desc="Shadow attack", disable=not self.verbose):
            gradients_ce = np.mean(
                self.estimator.loss_gradient(x=x_batch + perturbation, y=y_batch, sampling=False)
                * (1 - 2 * int(self.targeted)),
                axis=0,
                keepdims=True,
            )
            gradients = gradients_ce - self._get_regularisation_loss_gradients(perturbation)
            perturbation += self.learning_rate * gradients

        x_p = x + perturbation
        x_adv = np.clip(x_p, a_min=self.estimator.clip_values[0], a_max=self.estimator.clip_values[1]).astype(
            ART_NUMPY_DTYPE
        )

        return x_adv
    def fit(self,
            x: np.ndarray,
            y: np.ndarray,
            batch_size: int = 128,
            nb_epochs: int = 10,
            **kwargs) -> None:
        """
        Fit the classifier on the training set `(x, y)`.

        :param x: Training data.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
                  shape (nb_samples,).
        :param batch_size: Size of batches.
        :param nb_epochs: Number of epochs to use for training.
        :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for
               TensorFlow and providing it takes no effect.
        """
        # Check if train and output_ph available
        if self._train is None or self._labels_ph is None:
            raise ValueError(
                "Need the training objective and the output placeholder to train the model."
            )

        y = check_and_transform_label_format(y, self.nb_classes)

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x,
                                                                   y,
                                                                   fit=True)

        # Check label shape
        if self._reduce_labels:
            y_preprocessed = np.argmax(y_preprocessed, axis=1)

        num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
        ind = np.arange(len(x_preprocessed))

        # Start training
        for _ in range(nb_epochs):
            # Shuffle the examples
            random.shuffle(ind)

            # Train for one epoch
            for m in range(num_batch):
                i_batch = x_preprocessed[ind[m * batch_size:(m + 1) *
                                             batch_size]]
                o_batch = y_preprocessed[ind[m * batch_size:(m + 1) *
                                             batch_size]]

                # Create feed_dict
                feed_dict = {self._input_ph: i_batch, self._labels_ph: o_batch}
                feed_dict.update(self._feed_dict)

                # Run train step
                self._sess.run(self._train, feed_dict=feed_dict)
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        """
        Generate an adversarial patch and return the patch and its mask in arrays.

        :param x: An array with the original input images of shape NHWC or NCHW or input videos of shape NFHWC or NFCHW.
        :param y: An array with the original true labels.
        :return: An array with adversarial patch and an array of the patch mask.
        """
        logger.info("Creating adversarial patch.")

        if len(x.shape) == 2:
            raise ValueError(
                "Feature vectors detected. The adversarial patch can only be applied to data with spatial "
                "dimensions.")

        y_target = check_and_transform_label_format(
            labels=y, nb_classes=self.estimator.nb_classes)

        for _ in trange(self.max_iter, desc="Adversarial Patch Numpy"):
            patched_images, patch_mask_transformed, transforms = self._augment_images_with_random_patch(
                x, self.patch)

            num_batches = int(math.ceil(x.shape[0] / self.batch_size))
            patch_gradients = np.zeros_like(self.patch)

            for i_batch in range(num_batches):
                i_batch_start = i_batch * self.batch_size
                i_batch_end = (i_batch + 1) * self.batch_size

                gradients = self.estimator.loss_gradient(
                    patched_images[i_batch_start:i_batch_end],
                    y_target[i_batch_start:i_batch_end],
                )

                for i_image in range(gradients.shape[0]):
                    patch_gradients_i = self._reverse_transformation(
                        gradients[i_image, :, :, :],
                        patch_mask_transformed[i_image, :, :, :],
                        transforms[i_image],
                    )
                    patch_gradients += patch_gradients_i

            # patch_gradients = patch_gradients / (num_batches * self.batch_size)
            self.patch -= patch_gradients * self.learning_rate
            self.patch = np.clip(
                self.patch,
                a_min=self.estimator.clip_values[0],
                a_max=self.estimator.clip_values[1],
            )

        return self.patch, self._get_circular_patch_mask()
    def compute_loss(  # pylint: disable=W0221
            self,
            x: Union[np.ndarray, "torch.Tensor"],
            y: Union[np.ndarray, "torch.Tensor"],
            reduction: str = "none",
            **kwargs) -> Union[np.ndarray, "torch.Tensor"]:
        """
        Compute the loss.

        :param x: Sample input with shape as expected by the model.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices
                  of shape `(nb_samples,)`.
        :param reduction: Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
                   'none': no reduction will be applied
                   'mean': the sum of the output will be divided by the number of elements in the output,
                   'sum': the output will be summed.
        :return: Array of losses of the same shape as `x`.
        """
        import torch  # lgtm [py/repeated-import]

        self._model.eval()

        y = check_and_transform_label_format(y, self.nb_classes)

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x,
                                                                   y,
                                                                   fit=False)

        # Check label shape
        y_preprocessed = self.reduce_labels(y_preprocessed)

        if isinstance(x, torch.Tensor):
            inputs_t = x_preprocessed
            labels_t = y_preprocessed
        else:
            # Convert the inputs to Tensors
            inputs_t = torch.from_numpy(x_preprocessed).to(self._device)
            # Convert the labels to Tensors
            labels_t = torch.from_numpy(y_preprocessed).to(self._device)

        # Compute the loss and return
        model_outputs = self._model(inputs_t)
        prev_reduction = self._loss.reduction

        # Return individual loss values
        self._loss.reduction = reduction
        loss = self._loss(model_outputs[-1], labels_t)
        self._loss.reduction = prev_reduction

        if isinstance(x, torch.Tensor):
            return loss

        return loss.detach().cpu().numpy()
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs. `x` is expected to have spatial dimensions.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial patch.
        """
        logger.info("Creating adversarial patch.")
        #         print("Something")
        if len(x.shape) == 2:
            raise ValueError(
                "Feature vectors detected. The adversarial patch can only be applied to data with spatial "
                "dimensions.")

        y_target = check_and_transform_label_format(
            labels=y, nb_classes=self.estimator.nb_classes)

        for _ in trange(self.max_iter, desc="Adversarial patch"):
            patched_images, patch_mask_transformed, transforms = self._augment_images_with_random_patch(
                x, self.patch)

            num_batches = int(x.shape[0] / self.batch_size)
            patch_gradients = np.zeros_like(self.patch)

            for i_batch in range(num_batches):
                i_batch_start = i_batch * self.batch_size
                i_batch_end = (i_batch + 1) * self.batch_size

                gradients = self.estimator.loss_gradient(
                    patched_images[i_batch_start:i_batch_end],
                    y_target[i_batch_start:i_batch_end],
                )

                for i_image in range(self.batch_size):
                    patch_gradients_i = self._reverse_transformation(
                        gradients[i_image, :, :, :],
                        patch_mask_transformed[i_image, :, :, :],
                        transforms[i_image],
                    )
                    patch_gradients += patch_gradients_i

            # patch_gradients = patch_gradients / (num_batches * self.batch_size)
            self.patch -= patch_gradients * self.learning_rate
            self.patch = np.clip(
                self.patch,
                a_min=self.estimator.clip_values[0],
                a_max=self.estimator.clip_values[1],
            )

        return self.patch, self._get_circular_patch_mask()
示例#28
0
    def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: int = 10, **kwargs) -> None:
        """
        Fit the classifier on the training set `(x, y)`.

        :param x: Training data.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
                  shape (nb_samples,).
        :param batch_size: Size of batches.
        :param nb_epochs: Number of epochs to use for training.
        :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
               and providing it takes no effect.
        """
        import torch  # lgtm [py/repeated-import]

        if self._optimizer is None:
            raise ValueError("An optimizer is needed to train the model, but none for provided.")

        y = check_and_transform_label_format(y, self.nb_classes)

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y, fit=True)

        # Check label shape
        y_preprocessed = self.reduce_labels(y_preprocessed)

        num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
        ind = np.arange(len(x_preprocessed))

        # Start training
        for _ in range(nb_epochs):
            # Shuffle the examples
            random.shuffle(ind)

            # Train for one epoch
            for m in range(num_batch):
                i_batch = torch.from_numpy(x_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
                o_batch = torch.from_numpy(y_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)

                # Zero the parameter gradients
                self._optimizer.zero_grad()

                # Perform prediction
                model_outputs = self._model(i_batch)

                # Form the loss function
                loss = self._loss(model_outputs[-1], o_batch)

                # Actual training
                loss.backward()
                self._optimizer.step()
        targets = o_batch.detach().cpu().numpy()
        predictions = torch.argmax(model_outputs[0].detach(), axis=1).cpu().numpy()
        return targets, predictions
示例#29
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial examples.

        :param x: Original input samples representing videos of format NFHWC.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: Adversarial examples.
        """
        import torch  # lgtm [py/repeated-import]

        if y is not None:
            y = check_and_transform_label_format(y, self.estimator.nb_classes)
        if y is None:
            if self.targeted:  # pragma: no cover
                raise ValueError(
                    "Target labels `y` need to be provided for a targeted attack."
                )

            # Use model predictions as true labels
            logger.info("Using model predictions as true labels.")
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        dataset = torch.utils.data.TensorDataset(
            torch.from_numpy(x.astype(ART_NUMPY_DTYPE)),
            torch.from_numpy(y.astype(ART_NUMPY_DTYPE)),
        )

        data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                                  batch_size=self.batch_size,
                                                  shuffle=False,
                                                  drop_last=False)

        x_adv = x.copy().astype(ART_NUMPY_DTYPE)

        # Compute perturbation with batching
        for (batch_id, batch_all) in enumerate(
                tqdm(data_loader,
                     desc="OverTheAirFlickeringPyTorch - Batches",
                     leave=False,
                     disable=not self.verbose)):
            (batch, batch_labels) = batch_all[0], batch_all[1]

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                batch, batch_labels)

        return x_adv
示例#30
0
    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
        """
        Train the attack model.

        :param x: Input to training process. Includes all features used to train the original model.
        :param y: True labels of the features.
        """

        # Checks:
        if isinstance(self.attack_feature,
                      int) and self.attack_feature >= x.shape[1]:
            raise ValueError(
                "attack_feature must be a valid index to a feature in x")

        # get vector of attacked feature
        attacked_feature = x[:, self.attack_feature]
        self._values = get_feature_values(attacked_feature,
                                          isinstance(self.attack_feature, int))
        if isinstance(self.attack_feature, int):
            y_one_hot = float_to_categorical(attacked_feature)
        else:
            y_one_hot = floats_to_one_hot(attacked_feature)
        y_ready = check_and_transform_label_format(
            y_one_hot, len(np.unique(attacked_feature)), return_one_hot=True)
        if y_ready is None:
            raise ValueError("None value detected.")

        # create training set for attack model
        if self.scale_range is not None:
            normalized_labels = minmax_scale(y, feature_range=self.scale_range)
        else:
            normalized_labels = y * self.prediction_normal_factor
        normalized_labels = check_and_transform_label_format(
            normalized_labels, return_one_hot=True)
        x_train = np.concatenate(
            (np.delete(x, self.attack_feature, 1), normalized_labels),
            axis=1).astype(np.float32)

        # train attack model
        self.attack_model.fit(x_train, y_ready)