示例#1
0
def main(args):
    mode = args.mode
    eps = args.eps
    norm = norm_list.get(args.norm)
    p_mod = args.p_mod
    temperature = args.temperature

    (x_train, y_train), (x_test, y_test), min_, max_ = load_data()

    x_train = x_train[:500]
    y_train = y_train[:500]

    model = VGG('VGG16')

    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    x = x_train.copy().astype(np.float32)

    nb_instances = len(x)
    pred_y = classifier.predict(x)

    nb_dims = x.shape[1] * x.shape[2] * x.shape[3]
    # 変更を加えるピクセル数
    n_mod = int(round(nb_dims * p_mod))

    v = random_sphere(nb_points=1, nb_dims=nb_dims, radius=eps, norm=norm)
    v = v.reshape(1, x.shape[1], x.shape[2], x.shape[3])
    v = np.array(v, dtype='float32')
    v_max = v.copy()
    current_y = classifier.predict(x + v)
    fooling_rate = loss_fn(pred_y, current_y, mode=mode)
    fooling_rate_max = fooling_rate

    # Go through the data set and compute the perturbation increments sequentially
    for j in range(len(x) * 1000):
        v_ = v.reshape(nb_dims).copy()
        # 変更を加えるピクセルの番地
        idx = random.sample(range(len(v_)), n_mod)
        # 正規分布からの乱数で置き換え
        #v_[idx] = np.random.normal(loc=0.0, scale=np.std(v_), size=n_mod)
        # 正規分布からの乱数を加算
        v_[idx] = v_[idx] + np.random.normal(
            loc=0.0, scale=np.std(v_), size=n_mod)
        # 均一分布からの乱数を加算
        #v_[idx] = v_[idx] + np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod)
        # 均一分布から乱数で置き換え
        #v_[idx] = np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod)

        # 摂動が任意の長さに収まるように射影
        v_ = projection(v_.reshape(1, 3, 32, 32), eps, norm)
        # fooling rate
        current_y = classifier.predict(x + v_)
        fooling_rate_ = loss_fn(pred_y, current_y, mode=mode)
        # 判定
        if random.random() < np.exp(
            (fooling_rate_ - fooling_rate_max) / (temperature + 1e-10)):
            print(j, fooling_rate_, fooling_rate_max, temperature,
                  np.linalg.norm(v_))
            v = v_.copy()
            if fooling_rate_max < fooling_rate_:
                fooling_rate_max = fooling_rate_
                v_max = v_.copy()
            fooling_rate = fooling_rate_
        temperature = 0.99 * temperature

    # Compute fooling rate
    adv_x = x + v_max
    plot_image(v_max[0], 'sample1.png')
    plot_image(adv_x[1], 'sample2.png')
    plot_image(x[1], 'sample3.png')
    adv_y = classifier.predict(adv_x)
    fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate")
    print(fooling_rate)
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        logger.info("Computing universal perturbation based on %s attack.",
                    self.attacker)

        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        if y is None:
            # Use model predictions as true labels
            logger.info("Using model predictions as true labels.")
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        y_index = np.argmax(y, axis=1)

        # Init universal perturbation
        noise = 0
        fooling_rate = 0.0
        nb_instances = len(x)

        # Instantiate the middle attacker
        attacker = self._get_attack(self.attacker, self.attacker_params)

        # Generate the adversarial examples
        nb_iter = 0
        pbar = tqdm(self.max_iter,
                    desc="Universal perturbation",
                    disable=not self.verbose)

        while fooling_rate < 1.0 - self.delta and nb_iter < self.max_iter:
            # Go through all the examples randomly
            rnd_idx = random.sample(range(nb_instances), nb_instances)

            # Go through the data set and compute the perturbation increments sequentially
            for j, ex in enumerate(x[rnd_idx]):
                x_i = ex[None, ...]

                current_label = np.argmax(
                    self.estimator.predict(x_i + noise)[0])
                original_label = y_index[rnd_idx][j]

                if current_label == original_label:
                    # Compute adversarial perturbation
                    adv_xi = attacker.generate(x_i + noise, y=y[rnd_idx][[j]])
                    new_label = np.argmax(self.estimator.predict(adv_xi)[0])

                    # If the class has changed, update v
                    if current_label != new_label:
                        noise = adv_xi - x_i

                        # Project on L_p ball
                        noise = projection(noise, self.eps, self.norm)
            nb_iter += 1
            pbar.update(1)

            # Apply attack and clip
            x_adv = x + noise
            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)

            # Compute the error rate
            y_adv = np.argmax(self.estimator.predict(x_adv, batch_size=1),
                              axis=1)
            fooling_rate = np.sum(y_index != y_adv) / nb_instances

        pbar.close()
        self.fooling_rate = fooling_rate
        self.converged = nb_iter < self.max_iter
        self.noise = noise
        logger.info("Success rate of universal perturbation attack: %.2f%%",
                    100 * fooling_rate)

        return x_adv
示例#3
0
    def generate(self,
                 x,
                 y=None,
                 init_noise=None,
                 restart=-1,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: An array with the original labels to be predicted.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        x = x.astype(ART_NUMPY_DTYPE)
        nb_instances = x.shape[0]
        preds = self.estimator.predict(x, batch_size=self.batch_size)

        if y is None:
            if self.targeted:
                raise ValueError(
                    'Target labels `y` need to be provided for targeted attacks.'
                )
            else:
                # Use model predictions as correct outputs
                logger.info(
                    'Using the model predictions as the correct labels for SimBA.'
                )
                y_i = np.argmax(preds, axis=1)
        else:
            y_i = np.argmax(y, axis=1)

        desired_labels = y_i

        if restart < 0:
            current_labels = np.argmax(preds, axis=1)
            last_probs = preds[(range(nb_instances), desired_labels)]
        else:
            preds = self.estimator.predict(x + init_noise,
                                           batch_size=self.batch_size)
            current_labels = np.argmax(preds, axis=1)
            last_probs = preds[(range(nb_instances), desired_labels)]

        if self.estimator.channels_first:
            nb_channels = x.shape[1]
        else:
            nb_channels = x.shape[3]

        n_dims = np.prod(x[0].shape)

        if self.attack == 'px':
            if self.order == 'diag':
                indices = self.diagonal_order(x.shape[2],
                                              nb_channels)[:self.max_iter]
            elif self.order == 'random':
                indices = np.random.permutation(n_dims)[:self.max_iter]
            indices_size = len(indices)
            while indices_size < self.max_iter:
                if self.order == 'diag':
                    tmp_indices = self.diagonal_order(x.shape[2], nb_channels)
                elif self.order == 'random':
                    tmp_indices = np.random.permutation(n_dims)
                indices = np.hstack((indices, tmp_indices))[:self.max_iter]
                indices_size = len(indices)
        elif self.attack == 'dct':
            indices = self._block_order(x.shape[2],
                                        nb_channels,
                                        initial_size=self.freq_dim,
                                        stride=self.stride)[:self.max_iter]
            indices_size = len(indices)
            while indices_size < self.max_iter:
                tmp_indices = self._block_order(x.shape[2],
                                                nb_channels,
                                                initial_size=self.freq_dim,
                                                stride=self.stride)
                indices = np.hstack((indices, tmp_indices))[:self.max_iter]
                indices_size = len(indices)
            trans = lambda z: self._block_idct(z, block_size=x.shape[2])

        clip_min = -np.inf
        clip_max = np.inf
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values

        success_rate = 0.0

        if restart < 0:
            nb_iter = 0
            noise = 0
        else:
            nb_iter = restart
            noise = init_noise

        diff = np.zeros(n_dims)
        while success_rate < 1. - self.delta and nb_iter < self.max_iter:
            if np.random.rand() < 0.5:
                diff[indices[nb_iter]] = self.epsilon
            else:
                diff[indices[nb_iter]] = -self.epsilon

            if (nb_iter + 1) % self.skip == 0:
                if self.attack == 'dct':
                    left_noise = noise + trans(
                        diff.reshape(x[0][None, ...].shape))
                    left_noise = projection(left_noise, self.eps, self.norm)
                elif self.attack == 'px':
                    left_noise = noise + diff.reshape(x[0][None, ...].shape)
                    left_noise = projection(left_noise, self.eps, self.norm)

                left_preds = self.estimator.predict(np.clip(
                    x + left_noise, clip_min, clip_max),
                                                    batch_size=self.batch_size)
                left_probs = left_preds[(range(nb_instances), desired_labels)]

                # use Use (2 * int(self.targeted) - 1) to shorten code?
                if self.targeted:
                    if np.sum(left_probs - last_probs) > 0.0:
                        last_probs = left_probs
                        noise = left_noise
                        current_labels = np.argmax(left_preds, axis=1)
                else:
                    if np.sum(left_probs - last_probs) < 0.0:
                        last_probs = left_probs
                        noise = left_noise
                        current_labels = np.argmax(left_preds, axis=1)

                # Compute the error rate
                if self.targeted:
                    success_rate = np.sum(
                        desired_labels == current_labels) / nb_instances
                else:
                    success_rate = np.sum(
                        desired_labels != current_labels) / nb_instances

                diff = np.zeros(n_dims)

            nb_iter = nb_iter + 1

            if nb_iter % (self.skip * 10) == 0:
                val_norm = np.linalg.norm(noise.flatten(), ord=self.norm)
                logger.info(
                    'Success rate of Universal SimBA (%s) %s attack at %d iterations: %.2f%% (L%s norm of noise: %.2f)',
                    self.attack, ['non-targeted', 'targeted'][self.targeted],
                    nb_iter, 100 * success_rate, str(self.norm), val_norm)

        logger.info(
            'Final success rate of Universal SimBA (%s) %s attack: %.2f%%',
            self.attack, ['non-targeted',
                          'targeted'][self.targeted], 100 * success_rate)
        return x + noise
示例#4
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x = x.astype(ART_NUMPY_DTYPE)

        if self.estimator.channels_first:
            nb_channels = x.shape[1]
            nb_xdim = x.shape[2]
            nb_ydim = x.shape[3]
        else:
            nb_channels = x.shape[3]
            nb_xdim = x.shape[1]
            nb_ydim = x.shape[2]

        if nb_xdim != nb_ydim:
            raise ValueError('Input images must be square.')

        clip_min = -np.inf
        clip_max = np.inf
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values

        # Init
        noise = np.zeros((1, nb_xdim, nb_ydim, nb_channels))
        if self.estimator.channels_first:
            noise = noise.transpose(0, 3, 1, 2)
        fooling_rate = 0.0
        max_fooling_rate = 0.0
        nb_instances = len(x)

        # get the labels
        if y is None:
            # use the predicted labels
            logger.info("Using model predictions as the correct labels.")
            pred_y = self.estimator.predict(x, batch_size=self.batch_size)
        else:
            # use the actual labels
            pred_y = y
        
        correct_y_max = np.argmax(pred_y, axis=1)

        nb_blocks = int(nb_xdim / self.block_size)
        for i in range(nb_blocks):
            for j in range(nb_blocks):
                # get Fourier basis
                xf = np.zeros((nb_xdim, nb_ydim))
                xf[i * self.block_size, j * self.block_size] = 1.0
                Z = ifftn(xf)
                uap_sfa_1 = np.real(Z)

                xf = np.zeros((nb_xdim, nb_ydim))
                xf[nb_xdim - i * self.block_size - 1, nb_ydim - j * self.block_size - 1] = 1.0
                Z = ifftn(xf)
                uap_sfa_2 = np.real(Z)

                uap_sfa = (1 + i) * uap_sfa_1 + (1 - i) * uap_sfa_2

                # generate noise
                tmp_noise = np.zeros((1, nb_xdim, nb_ydim, nb_channels))
                for c in range(nb_channels):
                    tmp_noise[:, :, :, c] = tmp_noise[:, :, :, c] + self.epsilon * uap_sfa
                
                # projection
                tmp_noise = projection(tmp_noise, self.eps, self.norm)
                if self.estimator.channels_first:
                    tmp_noise = tmp_noise.transpose(0, 3, 1, 2)

                # Apply attack and clip
                x_adv = x + tmp_noise
                if self.estimator.clip_values is not None:
                    x_adv = np.clip(x_adv, clip_min, clip_max)

                # Compute the fooling rate
                y_adv = np.argmax(self.estimator.predict(x_adv, batch_size=self.batch_size), axis=1)
                fooling_rate = np.sum(correct_y_max != y_adv) / nb_instances

                if max_fooling_rate < fooling_rate:
                    max_fooling_rate = fooling_rate
                    noise = tmp_noise

            val_norm = np.linalg.norm(noise.flatten(), ord=self.norm)
            logger.info('Success rate of Fourier attack at section %d: %.2f%% (L%s norm of noise: %.2f)', i, 100 * max_fooling_rate, str(self.norm), val_norm)

        self.fooling_rate = max_fooling_rate
        self.noise = noise
        logger.info("Final success rate of Fourier attack: %.2f%%", 100 * max_fooling_rate)

        # generate adversarial examples
        x_adv = x + noise

        self.noise = noise

        return x_adv
    def generate(self, x, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param attacker: Adversarial attack name. Default is 'deepfool'. Supported names: 'carlini', 'deepfool', 'fgsm',
                'newtonfool', 'jsma', 'vat'.
        :type attacker: `str`
        :param attacker_params: Parameters specific to the adversarial attack.
        :type attacker_params: `dict`
        :param delta: desired accuracy
        :type delta: `float`
        :param max_iter: The maximum number of iterations for computing universal perturbation.
        :type max_iter: `int`
        :param eps: Attack step size (input variation)
        :type eps: `float`
        :param norm: Order of the norm. Possible values: np.inf, 1 and 2 (default is np.inf).
        :type norm: `int`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        assert self.set_params(**kwargs)

        # Init universal perturbation
        v = 0
        fooling_rate = 0.0
        nb_instances = len(x)

        # Instantiate the middle attacker and get the predicted labels
        attacker = self._get_attack(self.attacker, self.attacker_params)
        pred_y = self.classifier.predict(x, logits=False)
        pred_y_max = np.argmax(pred_y, axis=1)

        # Start to generate the adversarial examples
        nb_iter = 0
        while fooling_rate < 1. - self.delta and nb_iter < self.max_iter:
            # Go through all the examples randomly
            rnd_idx = random.sample(range(nb_instances), nb_instances)

            # Go through the data set and compute the perturbation increments sequentially
            for j, ex in enumerate(x[rnd_idx]):
                xi = ex[None, ...]

                f_xi = self.classifier.predict(xi + v, logits=True)
                fk_i_hat = np.argmax(f_xi[0])
                fk_hat = np.argmax(pred_y[rnd_idx][j])

                if fk_i_hat == fk_hat:
                    # Compute adversarial perturbation
                    adv_xi = attacker.generate(xi + v)
                    adv_f_xi = self.classifier.predict(adv_xi, logits=True)
                    adv_fk_i_hat = np.argmax(adv_f_xi[0])

                    # If the class has changed, update v
                    if fk_i_hat != adv_fk_i_hat:
                        v += adv_xi - xi

                        # Project on L_p ball
                        v = projection(v, self.eps, self.norm)
            nb_iter += 1

            # Compute the error rate
            adv_x = x + v
            adv_y = np.argmax(self.classifier.predict(adv_x, logits=False))
            fooling_rate = np.sum(pred_y_max != adv_y) / nb_instances

        self.fooling_rate = fooling_rate
        self.converged = (nb_iter < self.max_iter)
        self.v = v

        return adv_x
示例#6
0
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: An array with the original labels to be predicted.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        logger.info('Computing universal perturbation based on %s attack.',
                    self.attacker)

        # Init universal perturbation
        noise = 0
        fooling_rate = 0.0
        nb_instances = len(x)

        # Instantiate the middle attacker and get the predicted labels
        attacker = self._get_attack(self.attacker, self.attacker_params)
        pred_y = self.classifier.predict(x, batch_size=1)
        pred_y_max = np.argmax(pred_y, axis=1)

        # Start to generate the adversarial examples
        nb_iter = 0
        while fooling_rate < 1. - self.delta and nb_iter < self.max_iter:
            # Go through all the examples randomly
            rnd_idx = random.sample(range(nb_instances), nb_instances)

            # Go through the data set and compute the perturbation increments sequentially
            for j, ex in enumerate(x[rnd_idx]):
                x_i = ex[None, ...]

                current_label = np.argmax(
                    self.classifier.predict(x_i + noise)[0])
                original_label = np.argmax(pred_y[rnd_idx][j])

                if current_label == original_label:
                    # Compute adversarial perturbation
                    adv_xi = attacker.generate(x_i + noise)
                    new_label = np.argmax(self.classifier.predict(adv_xi)[0])

                    # If the class has changed, update v
                    if current_label != new_label:
                        noise = adv_xi - x_i

                        # Project on L_p ball
                        noise = projection(noise, self.eps, self.norm)
            nb_iter += 1

            # Apply attack and clip
            x_adv = x + noise
            if hasattr(
                    self.classifier,
                    'clip_values') and self.classifier.clip_values is not None:
                clip_min, clip_max = self.classifier.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)

            # Compute the error rate
            y_adv = np.argmax(self.classifier.predict(x_adv, batch_size=1),
                              axis=1)
            fooling_rate = np.sum(pred_y_max != y_adv) / nb_instances

        self.fooling_rate = fooling_rate
        self.converged = nb_iter < self.max_iter
        self.noise = noise
        logger.info('Success rate of universal perturbation attack: %.2f%%',
                    100 * fooling_rate)

        return x_adv
示例#7
0
    def generate(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: An array with the targeted labels.
        :return: An array holding the adversarial examples.
        """
        logger.info(
            "Computing targeted universal perturbation based on %s attack.",
            self.attacker)

        # Init universal perturbation
        noise = 0
        fooling_rate = 0.0
        targeted_success_rate = 0.0
        nb_instances = len(x)

        # Instantiate the middle attacker and get the predicted labels
        attacker = self._get_attack(self.attacker, self.attacker_params)
        pred_y = self.estimator.predict(x, batch_size=1)
        pred_y_max = np.argmax(pred_y, axis=1)

        # Start to generate the adversarial examples
        nb_iter = 0
        while targeted_success_rate < 1.0 - self.delta and nb_iter < self.max_iter:
            # Go through all the examples randomly
            rnd_idx = random.sample(range(nb_instances), nb_instances)

            # Go through the data set and compute the perturbation increments sequentially
            for j, (ex, ey) in enumerate(zip(x[rnd_idx], y[rnd_idx])):
                x_i = ex[None, ...]
                y_i = ey[None, ...]

                current_label = np.argmax(
                    self.estimator.predict(x_i + noise)[0])
                target_label = np.argmax(y_i)

                if current_label != target_label:
                    # Compute adversarial perturbation
                    adv_xi = attacker.generate(x_i + noise, y=y_i)

                    new_label = np.argmax(self.estimator.predict(adv_xi)[0])

                    # If the class has changed, update v
                    if new_label == target_label:
                        noise = adv_xi - x_i

                        # Project on L_p ball
                        noise = projection(noise, self.eps, self.norm)
            nb_iter += 1

            # Apply attack and clip
            x_adv = x + noise
            if hasattr(
                    self.estimator,
                    "clip_values") and self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)

            # Compute the error rate
            y_adv = np.argmax(self.estimator.predict(x_adv, batch_size=1),
                              axis=1)
            fooling_rate = np.sum(pred_y_max != y_adv) / nb_instances
            targeted_success_rate = np.sum(
                y_adv == np.argmax(y, axis=1)) / nb_instances

        self.fooling_rate = fooling_rate
        self.targeted_success_rate = targeted_success_rate
        self.converged = nb_iter < self.max_iter
        self.noise = noise
        logger.info("Fooling rate of universal perturbation attack: %.2f%%",
                    100 * fooling_rate)
        logger.info(
            "Targeted success rate of universal perturbation attack: %.2f%%",
            100 * targeted_success_rate)

        return x_adv
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        mask = kwargs.get("mask")

        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        if y is None:
            if self.targeted:
                raise ValueError(
                    "Target labels `y` need to be provided for a targeted attack."
                )
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size)).astype(
                    np.int32)

        if self.estimator.nb_classes == 2 and y.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        x_adv = x.astype(ART_NUMPY_DTYPE)

        for _ in trange(max(1, self.nb_random_init),
                        desc="AutoPGD - restart",
                        disable=not self.verbose):
            # Determine correctly predicted samples
            y_pred = self.estimator.predict(x_adv)
            if self.targeted:
                sample_is_robust = np.argmax(y_pred, axis=1) != np.argmax(
                    y, axis=1)
            elif not self.targeted:
                sample_is_robust = np.argmax(y_pred,
                                             axis=1) == np.argmax(y, axis=1)

            if np.sum(sample_is_robust) == 0:
                break

            x_robust = x_adv[sample_is_robust]
            y_robust = y[sample_is_robust]
            x_init = x[sample_is_robust]

            n = x_robust.shape[0]
            m = np.prod(x_robust.shape[1:]).item()
            random_perturbation = (random_sphere(
                n, m, self.eps,
                self.norm).reshape(x_robust.shape).astype(ART_NUMPY_DTYPE))

            x_robust = x_robust + random_perturbation

            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_robust = np.clip(x_robust, clip_min, clip_max)

            perturbation = projection(x_robust - x_init, self.eps, self.norm)
            x_robust = x_init + perturbation

            # Compute perturbation with implicit batching
            for batch_id in trange(
                    int(np.ceil(x_robust.shape[0] / float(self.batch_size))),
                    desc="AutoPGD - batch",
                    leave=False,
                    disable=not self.verbose,
            ):
                self.eta = 2 * self.eps_step
                batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                    batch_id + 1) * self.batch_size
                x_k = x_robust[batch_index_1:batch_index_2].astype(
                    ART_NUMPY_DTYPE)
                x_init_batch = x_init[batch_index_1:batch_index_2].astype(
                    ART_NUMPY_DTYPE)
                y_batch = y_robust[batch_index_1:batch_index_2]

                p_0 = 0
                p_1 = 0.22
                var_w = [p_0, p_1]

                while True:
                    p_j_p_1 = var_w[-1] + max(var_w[-1] - var_w[-2] - 0.03,
                                              0.06)
                    if p_j_p_1 > 1:
                        break
                    var_w.append(p_j_p_1)

                var_w = [math.ceil(p * self.max_iter) for p in var_w]

                eta = self.eps_step
                self.count_condition_1 = 0

                for k_iter in trange(self.max_iter,
                                     desc="AutoPGD - iteration",
                                     leave=False,
                                     disable=not self.verbose):

                    # Get perturbation, use small scalar to avoid division by 0
                    tol = 10e-8

                    # Get gradient wrt loss; invert it if attack is targeted
                    grad = self.estimator.loss_gradient(
                        x_k, y_batch) * (1 - 2 * int(self.targeted))

                    # Apply norm bound
                    if self.norm in [np.inf, "inf"]:
                        grad = np.sign(grad)
                    elif self.norm == 1:
                        ind = tuple(range(1, len(x_k.shape)))
                        grad = grad / (np.sum(
                            np.abs(grad), axis=ind, keepdims=True) + tol)
                    elif self.norm == 2:
                        ind = tuple(range(1, len(x_k.shape)))
                        grad = grad / (np.sqrt(
                            np.sum(np.square(grad), axis=ind, keepdims=True)) +
                                       tol)
                    assert x_k.shape == grad.shape

                    perturbation = grad

                    if mask is not None:
                        perturbation = perturbation * (
                            mask.astype(ART_NUMPY_DTYPE))

                    # Apply perturbation and clip
                    z_k_p_1 = x_k + eta * perturbation

                    if self.estimator.clip_values is not None:
                        clip_min, clip_max = self.estimator.clip_values
                        z_k_p_1 = np.clip(z_k_p_1, clip_min, clip_max)

                    if k_iter == 0:
                        x_1 = z_k_p_1
                        perturbation = projection(x_1 - x_init_batch, self.eps,
                                                  self.norm)
                        x_1 = x_init_batch + perturbation

                        f_0 = self.estimator.compute_loss(x=x_k,
                                                          y=y_batch,
                                                          reduction="mean")
                        f_1 = self.estimator.compute_loss(x=x_1,
                                                          y=y_batch,
                                                          reduction="mean")

                        self.eta_w_j_m_1 = eta
                        self.f_max_w_j_m_1 = f_0

                        if f_1 >= f_0:
                            self.f_max = f_1
                            self.x_max = x_1
                            self.x_max_m_1 = x_init_batch
                            self.count_condition_1 += 1
                        else:
                            self.f_max = f_0
                            self.x_max = x_k.copy()
                            self.x_max_m_1 = x_init_batch

                        # Settings for next iteration k
                        x_k_m_1 = x_k.copy()
                        x_k = x_1

                    else:
                        perturbation = projection(z_k_p_1 - x_init_batch,
                                                  self.eps, self.norm)
                        z_k_p_1 = x_init_batch + perturbation

                        alpha = 0.75

                        x_k_p_1 = x_k + alpha * (z_k_p_1 - x_k) + (
                            1 - alpha) * (x_k - x_k_m_1)

                        if self.estimator.clip_values is not None:
                            clip_min, clip_max = self.estimator.clip_values
                            x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max)

                        perturbation = projection(x_k_p_1 - x_init_batch,
                                                  self.eps, self.norm)
                        x_k_p_1 = x_init_batch + perturbation

                        f_k_p_1 = self.estimator.compute_loss(x=x_k_p_1,
                                                              y=y_batch,
                                                              reduction="mean")

                        if f_k_p_1 == 0.0:
                            x_k = x_k_p_1.copy()
                            break

                        if (not self.targeted and f_k_p_1 > self.f_max) or (
                                self.targeted and f_k_p_1 < self.f_max):
                            self.count_condition_1 += 1
                            self.x_max = x_k_p_1
                            self.x_max_m_1 = x_k
                            self.f_max = f_k_p_1

                        if k_iter in var_w:

                            rho = 0.75

                            condition_1 = self.count_condition_1 < rho * (
                                k_iter - var_w[var_w.index(k_iter) - 1])
                            condition_2 = self.eta_w_j_m_1 == eta and self.f_max_w_j_m_1 == self.f_max

                            if condition_1 or condition_2:
                                eta = eta / 2
                                x_k_m_1 = self.x_max_m_1
                                x_k = self.x_max
                            else:
                                x_k_m_1 = x_k
                                x_k = x_k_p_1.copy()

                            self.count_condition_1 = 0
                            self.eta_w_j_m_1 = eta
                            self.f_max_w_j_m_1 = self.f_max

                        else:
                            x_k_m_1 = x_k
                            x_k = x_k_p_1.copy()

                y_pred_adv_k = self.estimator.predict(x_k)
                if self.targeted:
                    sample_is_not_robust_k = np.invert(
                        np.argmax(y_pred_adv_k, axis=1) != np.argmax(y_batch,
                                                                     axis=1))
                elif not self.targeted:
                    sample_is_not_robust_k = np.invert(
                        np.argmax(y_pred_adv_k, axis=1) == np.argmax(y_batch,
                                                                     axis=1))

                x_robust[batch_index_1:batch_index_2][
                    sample_is_not_robust_k] = x_k[sample_is_not_robust_k]

            x_adv[sample_is_robust] = x_robust

        return x_adv
    def _compute(
        self,
        x: np.ndarray,
        x_init: np.ndarray,
        y: np.ndarray,
        mask: Optional[np.ndarray],
        eps: Union[int, float, np.ndarray],
        eps_step: Union[int, float, np.ndarray],
        project: bool,
        random_init: bool,
    ) -> np.ndarray:
        if random_init:
            n = x.shape[0]
            m = np.prod(x.shape[1:]).item()
            random_perturbation = random_sphere(n, m, eps, self.norm).reshape(
                x.shape).astype(ART_NUMPY_DTYPE)
            if mask is not None:
                random_perturbation = random_perturbation * (
                    mask.astype(ART_NUMPY_DTYPE))
            x_adv = x.astype(ART_NUMPY_DTYPE) + random_perturbation

            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)
        else:
            if x.dtype == np.object:
                x_adv = x.copy()
            else:
                x_adv = x.astype(ART_NUMPY_DTYPE)

        # Compute perturbation with implicit batching
        for batch_id in range(int(np.ceil(x.shape[0] /
                                          float(self.batch_size)))):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            batch_index_2 = min(batch_index_2, x.shape[0])
            batch = x_adv[batch_index_1:batch_index_2]
            batch_labels = y[batch_index_1:batch_index_2]

            mask_batch = mask
            if mask is not None:
                # Here we need to make a distinction: if the masks are different for each input, we need to index
                # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
                if len(mask.shape) == len(x.shape):
                    mask_batch = mask[batch_index_1:batch_index_2]

            # Get perturbation
            perturbation = self._compute_perturbation(batch, batch_labels,
                                                      mask_batch)

            # Compute batch_eps and batch_eps_step
            if isinstance(eps, np.ndarray):
                if len(eps.shape) == len(
                        x.shape) and eps.shape[0] == x.shape[0]:
                    batch_eps = eps[batch_index_1:batch_index_2]
                    batch_eps_step = eps_step[batch_index_1:batch_index_2]

                else:
                    batch_eps = eps
                    batch_eps_step = eps_step

            else:
                batch_eps = eps
                batch_eps_step = eps_step

            # Apply perturbation and clip
            x_adv[batch_index_1:batch_index_2] = self._apply_perturbation(
                batch, perturbation, batch_eps_step)

            if project:
                if x_adv.dtype == np.object:
                    for i_sample in range(batch_index_1, batch_index_2):
                        if isinstance(
                                batch_eps, np.ndarray
                        ) and batch_eps.shape[0] == x_adv.shape[0]:
                            perturbation = projection(
                                x_adv[i_sample] - x_init[i_sample],
                                batch_eps[i_sample], self.norm)

                        else:
                            perturbation = projection(
                                x_adv[i_sample] - x_init[i_sample], batch_eps,
                                self.norm)

                        x_adv[i_sample] = x_init[i_sample] + perturbation

                else:
                    perturbation = projection(
                        x_adv[batch_index_1:batch_index_2] -
                        x_init[batch_index_1:batch_index_2], batch_eps,
                        self.norm)
                    x_adv[batch_index_1:batch_index_2] = x_init[
                        batch_index_1:batch_index_2] + perturbation

        return x_adv
示例#10
0
    def generate(self, x, noise=0, y=None, targeted=False, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: An array with the original labels to be predicted.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        logger.info('Computing universal perturbation based on %s attack.',
                    self.attacker)

        # Init universal perturbation
        #noise = 0
        fooling_rate = 0.0
        nb_instances = len(x)

        # Instantiate the middle attacker and get the predicted labels
        attacker = self._get_attack(self.attacker, self.attacker_params)
        pred_y = self.classifier.predict(x, batch_size=1)
        pred_y_max = np.argmax(pred_y, axis=1)

        norm2 = 0
        normInf = 0

        # Start to generate the adversarial examples
        nb_iter = 0
        #nb_update = -1 # init
        #while fooling_rate < 1. - self.delta and nb_iter < self.max_iter and nb_update!=0:
        while fooling_rate < 1. - self.delta and nb_iter < self.max_iter:
            #print("\nup_iter:{}/{}".format(nb_iter+1, self.max_iter))
            # Go through all the examples randomly
            rnd_idx = random.sample(range(nb_instances), nb_instances)

            #pbar = tqdm(total=len(rnd_idx))
            nb_attack = 0
            #nb_update = 0
            # Go through the data set and compute the perturbation increments sequentially
            if targeted == False:
                for j, ex in enumerate(x[rnd_idx]):
                    x_i = ex[None, ...]

                    current_label = np.argmax(
                        self.classifier.predict(x_i + noise)[0])
                    original_label = np.argmax(pred_y[rnd_idx][j])

                    if current_label == original_label:
                        # Compute adversarial perturbation
                        adv_xi = attacker.generate(x_i + noise)
                        new_label = np.argmax(
                            self.classifier.predict(adv_xi)[0])
                        nb_attack += 1

                        # If the class has changed, update v
                        if current_label != new_label:
                            noise = adv_xi - x_i

                            # Project on L_p ball
                            noise = projection(noise, self.eps, self.norm)

                            # 追加
                            normTmp = noise.reshape((noise.shape[0], -1))
                            norm2 = np.linalg.norm(normTmp, axis=1)[0]
                            normInf = abs(normTmp).max()
                            #nb_update += 1

                    #print("\tup_iter:{}/{}, img:{}/{}, norm_2:{:.3f}, norm_inf:{:.3f}".format(nb_iter+1, self.max_iter, j+1, x.shape[0], norm2, normInf))
                    #pbar.set_description("img:{}/{} norm2:{:.8f} normInf:{:.8f}".format(j+1, len(rnd_idx), norm2, normInf))
                    #pbar.update(1)

            elif targeted == True:
                for j, (ex, ey) in enumerate(zip(x[rnd_idx], y[rnd_idx])):
                    x_i = ex[None, ...]
                    y_i = ey[None, ...]

                    current_label = np.argmax(
                        self.classifier.predict(x_i + noise)[0])
                    target_label = np.argmax(ey)

                    if current_label != target_label:
                        # Compute adversarial perturbation
                        adv_xi = attacker.generate(x_i + noise, y=y_i)
                        new_label = np.argmax(
                            self.classifier.predict(adv_xi)[0])
                        nb_attack += 1

                        # If the class has changed, update v
                        if new_label == target_label:
                            noise = adv_xi - x_i

                            # Project on L_p ball
                            noise = projection(noise, self.eps, self.norm)

                            # 追加
                            normTmp = noise.reshape((noise.shape[0], -1))
                            norm2 = np.linalg.norm(normTmp, axis=1)[0]
                            normInf = abs(normTmp).max()
                            #nb_update += 1

                    #print("\tup_iter:{}/{}, img:{}/{}, norm_2:{:.3f}, norm_inf:{:.3f}".format(nb_iter+1, self.max_iter, j+1, x.shape[0], norm2, normInf))
                    #pbar.set_description("img:{}/{} norm2:{:.5f} normInf:{:.5f}".format(j+1, len(rnd_idx), norm2, normInf))
                    #pbar.update(1)

            #pbar.close()
            nb_iter += 1

            # Apply attack and clip
            x_adv = x + noise
            if hasattr(
                    self.classifier,
                    'clip_values') and self.classifier.clip_values is not None:
                clip_min, clip_max = self.classifier.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)

        self.fooling_rate = fooling_rate
        self.converged = nb_iter < self.max_iter
        self.noise = noise
        logger.info('Success rate of universal perturbation attack: %.2f%%',
                    fooling_rate)

        return x_adv
    def generate(self, x, y=None):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: The labels for the data `x`. Only provide this parameter if you'd like to use true
                  labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
                  "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
                  Labels should be one-hot-encoded.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """

        from art import NUMPY_DTYPE
        from art.utils import compute_success, get_labels_np_array, projection

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError(
                    'Target labels `y` need to be provided for a targeted attack.'
                )

            # Use model predictions as correct outputs
            targets = get_labels_np_array(self.classifier.predict(x))
        else:
            targets = y

        adv_x_best = None
        rate_best = 0.0

        for i_random_init in range(max(1, self.num_random_init)):
            adv_x = x.astype(NUMPY_DTYPE)
            noise = np.zeros_like(x)
            for i_max_iter in range(self.max_iter):

                adv_x = self._compute(
                    adv_x, targets, self.eps, self.eps_step,
                    self.num_random_init > 0 and i_max_iter == 0)
                if self._project:
                    noise = projection(adv_x - x, self.eps, self.norm)
                    adv_x = x + noise

                rate = 100 * compute_success(self.classifier, x, targets,
                                             adv_x, self.targeted)
                logger.info('Success rate of attack step: %.2f%%', rate)

                noise_norm = 0
                if self.norm == np.inf:
                    noise_norm = np.sign(noise)
                elif self.norm == 1:
                    ind = tuple(range(1, len(noise.shape)))
                    noise_norm = np.sum(np.abs(noise), axis=ind, keepdims=True)
                elif self.norm == 2:
                    ind = tuple(range(1, len(noise.shape)))
                    noise_norm = np.sqrt(
                        np.sum(np.square(noise), axis=ind, keepdims=True))

                TrackedPGD.tracked_x.append(
                    (adv_x, rate, i_max_iter, noise_norm))

            rate = 100 * compute_success(self.classifier, x, targets, adv_x,
                                         self.targeted)
            if rate > rate_best or adv_x_best is None:
                rate_best = rate
                adv_x_best = adv_x

        logger.info('Success rate of attack: %.2f%%', rate_best)

        return adv_x_best