Exemplo n.º 1
0
    def __init__(self,
                 inputs,
                 labels,
                 adv_inputs,
                 adv_preds,
                 targeted=False,
                 target_label=None):
        self._inputs, self._labels = check_pair_numpy_param(
            'inputs', inputs, 'labels', labels)
        self._adv_inputs, self._adv_preds = check_pair_numpy_param(
            'adv_inputs', adv_inputs, 'adv_preds', adv_preds)
        targeted = check_param_type('targeted', targeted, bool)
        self._targeted = targeted
        if target_label is not None:
            target_label = check_numpy_param('target_label', target_label)
        self._target_label = target_label
        self._true_label = np.argmax(self._labels, axis=1)
        self._adv_label = np.argmax(self._adv_preds, axis=1)

        idxes = np.arange(self._adv_preds.shape[0])
        if self._targeted:
            if target_label is None:
                msg = 'targeted attack need target_label, but got None.'
                LOGGER.error(TAG, msg)
                raise ValueError(msg)
            self._adv_preds, self._target_label = check_pair_numpy_param(
                'adv_pred', self._adv_preds, 'target_label', target_label)
            self._success_idxes = idxes[self._adv_label == self._target_label]
        else:
            self._success_idxes = idxes[self._adv_label != self._true_label]
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and origin/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.5, 0.2, 0.6],
            >>>                          [0.3, 0, 0.2]],
            >>>                         [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            >>>                          [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]])
        """
        if isinstance(labels, tuple):
            for i, labels_item in enumerate(labels):
                inputs, _ = check_pair_numpy_param('inputs', inputs, \
                    'labels[{}]'.format(i), labels_item)
        else:
            inputs, _ = check_pair_numpy_param('inputs', inputs, \
                'labels', labels)
        arr_x = inputs
        momentum = 0
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                if 'self._prob' in globals():
                    d_inputs = _transform_inputs(inputs, self._prob)
                else:
                    d_inputs = inputs
                gradient = self._gradient(d_inputs, labels)
                momentum = self._decay_factor * momentum + gradient
                adv_x = d_inputs + self._eps_iter * np.sign(momentum)
                perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff,
                                  self._eps * clip_diff)
                adv_x = arr_x + perturs
                adv_x = np.clip(adv_x, clip_min, clip_max)
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                if 'self._prob' in globals():
                    d_inputs = _transform_inputs(inputs, self._prob)
                else:
                    d_inputs = inputs
                gradient = self._gradient(d_inputs, labels)
                momentum = self._decay_factor * momentum + gradient
                adv_x = d_inputs + self._eps_iter * np.sign(momentum)
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x
        return adv_x
    def generate(self, inputs, labels):
        """
        Iteratively generate adversarial examples based on BIM method. The
        perturbation is normalized by projected method with parameter norm_level .

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.6, 0.2, 0.6],
            >>>                          [0.3, 0.3, 0.4]],
            >>>                         [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            >>>                          [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
        """
        if isinstance(labels, tuple):
            for i, labels_item in enumerate(labels):
                inputs, _ = check_pair_numpy_param('inputs', inputs, \
                    'labels[{}]'.format(i), labels_item)
        else:
            inputs, _ = check_pair_numpy_param('inputs', inputs, \
                'labels', labels)
        arr_x = inputs
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                perturs = _projection(adv_x - arr_x,
                                      self._eps,
                                      norm_level=self._norm_level)
                perturs = np.clip(perturs, (0 - self._eps) * clip_diff,
                                  self._eps * clip_diff)
                adv_x = arr_x + perturs
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                perturs = _projection(adv_x - arr_x,
                                      self._eps,
                                      norm_level=self._norm_level)
                adv_x = arr_x + perturs
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x
        return adv_x
Exemplo n.º 4
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                    adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        self._dtype = inputs.dtype
        gradient = self._gradient(inputs, labels)
        # use random method or not
        if self._alpha is not None:
            random_part = self._alpha * np.sign(
                np.random.normal(size=inputs.shape)).astype(self._dtype)
            perturbation = (self._eps - self._alpha) * gradient + random_part
        else:
            perturbation = self._eps * gradient

        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            perturbation = perturbation * (clip_max - clip_min)
            adv_x = inputs + perturbation
            adv_x = np.clip(adv_x, clip_min, clip_max)
        else:
            adv_x = inputs + perturbation
        return adv_x
Exemplo n.º 5
0
    def __init__(self, raw_preds, def_preds, raw_query_counts,
                 def_query_counts, raw_query_time, def_query_time,
                 def_detection_counts, true_labels, max_queries):
        self._raw_preds, self._def_preds = check_pair_numpy_param(
            'raw_preds', raw_preds, 'def_preds', def_preds)
        self._num_samples = self._raw_preds.shape[0]
        self._raw_query_counts, _ = check_equal_length('raw_query_counts',
                                                       raw_query_counts,
                                                       'number of sample',
                                                       self._raw_preds)
        self._def_query_counts, _ = check_equal_length('def_query_counts',
                                                       def_query_counts,
                                                       'number of sample',
                                                       self._raw_preds)
        self._raw_query_time, _ = check_equal_length('raw_query_time',
                                                     raw_query_time,
                                                     'number of sample',
                                                     self._raw_preds)
        self._def_query_time, _ = check_equal_length('def_query_time',
                                                     def_query_time,
                                                     'number of sample',
                                                     self._raw_preds)

        self._num_adv_samples = self._raw_query_counts[
            self._raw_query_counts > 0].shape[0]

        self._num_adv_samples = check_int_positive(
            'the number of adversarial samples', self._num_adv_samples)

        self._num_ben_samples = self._num_samples - self._num_adv_samples
        self._max_queries = check_int_positive('max_queries', max_queries)

        self._def_detection_counts = check_numpy_param('def_detection_counts',
                                                       def_detection_counts)
        self._true_labels = check_numpy_param('true_labels', true_labels)
Exemplo n.º 6
0
    def batch_defense(self, inputs, labels, batch_size=32, epochs=5):
        """
        Defense model with samples in batch.

        Args:
            inputs (numpy.ndarray): Samples based on which adversarial
                examples are generated.
            labels (numpy.ndarray): Labels of input samples.
            batch_size (int): Number of samples in one batch.
            epochs (int): Number of epochs.

        Returns:
            numpy.ndarray, loss of batch_defense operation.

        Raises:
            ValueError: If batch_size is 0.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        x_len = len(inputs)
        batch_size = check_int_positive('batch_size', batch_size)

        iters_per_epoch = int(x_len / batch_size)
        loss = None
        for _ in range(epochs):
            for step in range(iters_per_epoch):
                x_batch = inputs[step * batch_size:(step + 1) * batch_size]
                y_batch = labels[step * batch_size:(step + 1) * batch_size]
                loss = self.defense(x_batch, y_batch)
        return loss
    def defense(self, inputs, labels):
        """
        Enhance model via training with adversarial examples generated from input samples.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Labels of input samples.

        Returns:
            numpy.ndarray, loss of adversarial defense operation.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        if not self._graph_initialized:
            self._train_net(Tensor(inputs), Tensor(labels))
            self._graph_initialized = True

        x_len = inputs.shape[0]
        n_adv = int(np.ceil(self._replace_ratio*x_len))
        n_adv_per_attack = int(n_adv / len(self._attacks))

        adv_ids = np.random.choice(x_len, size=n_adv, replace=False)
        start = 0
        for attack in self._attacks:
            idx = adv_ids[start:start + n_adv_per_attack]
            inputs[idx] = attack.generate(inputs[idx], labels[idx])
            start += n_adv_per_attack

        loss = self._train_net(Tensor(inputs), Tensor(labels))
        return loss.asnumpy()
Exemplo n.º 8
0
    def _dist(self, before, after):
        """
        Calculate the distance between the model outputs of a raw sample and
            its smoothed counterpart.

        Args:
            before (numpy.ndarray): Model output of raw samples.
            after (numpy.ndarray): Model output of smoothed counterparts.

        Returns:
            float, distance based on specified norm.
        """
        before, after = check_pair_numpy_param('before', before, 'after',
                                               after)
        before, after = check_equal_shape('before', before, 'after', after)
        res = []
        diff = after - before
        for _, elem in enumerate(diff):
            if self._metric == 'l1':
                res.append(np.linalg.norm(elem, ord=1))
            elif self._metric == 'l2':
                res.append(np.linalg.norm(elem, ord=2))
            else:
                res.append(np.linalg.norm(elem, ord=1))
        return res
Exemplo n.º 9
0
    def generate(self, inputs, labels):
        """
        Main algorithm for NES.

        Args:
            inputs (numpy.ndarray): Benign input samples.
            labels (numpy.ndarray): Target labels.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Raises:
            ValueError: If the top_k less than 0 in Label-Only or Partial-Info setting.
            ValueError: If the target_imgs is None in Label-Only or Partial-Info setting.
            ValueError: If scene is not in ['Label_Only', 'Partial_Info', 'Query_Limit']

        Examples:
            >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]],
            >>> [1, 2])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)

        if self._scene == 'Label_Only' or self._scene == 'Partial_Info':
            if self._k < 1:
                msg = "In 'Label_Only' or 'Partial_Info' mode, 'top_k' must more than 0."
                LOGGER.error(TAG, msg)
                raise ValueError(msg)
            if self.target_imgs is None:
                msg = "In 'Label_Only' or 'Partial_Info' mode, 'target_imgs' must be set."
                LOGGER.error(TAG, msg)
                raise ValueError(msg)

        elif self._scene == 'Query_Limit':
            self._k = self._num_class
        else:
            msg = "scene must be string in 'Label_Only', 'Partial_Info' or 'Query_Limit' "
            LOGGER.error(TAG, msg)
            raise ValueError(msg)

        is_advs = []
        advs = []
        queries = []
        for sample, label, target_img in zip(inputs, labels, self.target_imgs):
            is_adv, adv, query = self._generate_one(sample, label, target_img)
            is_advs.append(is_adv)
            advs.append(adv)
            queries.append(query)

        return is_advs, advs, queries
Exemplo n.º 10
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                    adversarial examples.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, generated adversarial examples.
        """
        if isinstance(labels, tuple):
            for i, labels_item in enumerate(labels):
                inputs, _ = check_pair_numpy_param('inputs', inputs, \
                    'labels[{}]'.format(i), labels_item)
        else:
            inputs, _ = check_pair_numpy_param('inputs', inputs, \
                'labels', labels)
        self._dtype = inputs.dtype
        gradient = self._gradient(inputs, labels)
        # use random method or not
        if self._alpha is not None:
            random_part = self._alpha * np.sign(
                np.random.normal(size=inputs.shape)).astype(self._dtype)
            perturbation = (self._eps - self._alpha) * gradient + random_part
        else:
            perturbation = self._eps * gradient

        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            perturbation = perturbation * (clip_max - clip_min)
            adv_x = inputs + perturbation
            adv_x = np.clip(adv_x, clip_min, clip_max)
        else:
            adv_x = inputs + perturbation
        return adv_x
Exemplo n.º 11
0
    def defense(self, inputs, labels):
        """
        Enhance model via training with input samples.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Labels of input samples.

        Returns:
            numpy.ndarray, loss of defense operation.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        loss = self._train_net(Tensor(inputs), Tensor(labels))
        return loss.asnumpy()
Exemplo n.º 12
0
def get_attack_model(features, labels, config, n_jobs=-1):
    """
    Get trained attack model specify by config.

    Args:
        features (numpy.ndarray): Loss and logits characteristics of each sample.
        labels (numpy.ndarray): Labels of each sample whether belongs to training set.
        config (dict): Config of attacker, with key in ["method", "params"].
            The format is {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}},
            params of each method must within the range of changeable parameters.
            Tips of params implement can be found in
            "https://scikit-learn.org/0.16/modules/generated/sklearn.grid_search.GridSearchCV.html".
        n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
            otherwise the value of n_jobs must be a positive integer.

    Returns:
        sklearn.BaseEstimator, trained model specify by config["method"].

    Examples:
        >>> features = np.random.randn(10, 10)
        >>> labels = np.random.randint(0, 2, 10)
        >>> config = {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}}
        >>> attack_model = get_attack_model(features, labels, config)
    """
    features, labels = check_pair_numpy_param("features", features, "labels",
                                              labels)
    config = check_param_type("config", config, dict)
    n_jobs = check_param_type("n_jobs", n_jobs, int)
    if not (n_jobs == -1 or n_jobs > 0):
        msg = "Value of n_jobs must be -1 or positive integer."
        raise ValueError(msg)

    method = str.lower(config["method"])
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        if method == "knn":
            return _attack_knn(features, labels, config["params"], n_jobs)
        if method == "lr":
            return _attack_lr(features, labels, config["params"], n_jobs)
        if method == "mlp":
            return _attack_mlpc(features, labels, config["params"], n_jobs)
        if method == "rf":
            return _attack_rf(features, labels, config["params"], n_jobs)

    msg = "Method {} is not supported.".format(config["method"])
    LOGGER.error(TAG, msg)
    raise NameError(msg)
Exemplo n.º 13
0
    def fit(self, inputs, labels=None):
        """
        Train detector to decide the best radius.

        Args:
            inputs (numpy.ndarray): Benign samples.
            labels (numpy.ndarray): Ground truth labels of the input samples.
                Default:None.

        Returns:
            float, the best radius.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        LOGGER.debug(TAG, 'enter fit() function.')
        time_start = time.time()
        search_iters = (self._max_radius -
                        self._initial_radius) / self._search_step
        search_iters = np.round(search_iters).astype(int)
        radius = self._initial_radius
        pred = self._model.predict(Tensor(inputs))
        raw_preds = np.argmax(pred.asnumpy(), axis=1)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        raw_preds, labels = check_equal_shape('raw_preds', raw_preds, 'labels',
                                              labels)
        raw_acc = np.sum(raw_preds == labels) / inputs.shape[0]

        for _ in range(search_iters):
            rc_preds = self._rc_forward(inputs, radius)
            rc_preds, labels = check_equal_shape('rc_preds', rc_preds,
                                                 'labels', labels)
            def_acc = np.sum(rc_preds == labels) / inputs.shape[0]
            if def_acc >= raw_acc - self._degrade_limit:
                radius += self._search_step
                continue
            break

        self._radius = radius - self._search_step
        LOGGER.debug(TAG, 'best radius is: %s', self._radius)
        LOGGER.debug(
            TAG, 'time used to train detector of %d samples is: %s seconds',
            inputs.shape[0],
            time.time() - time_start)
        return self._radius
Exemplo n.º 14
0
    def generate(self, inputs, labels):
        """
        Generate adversarial images in a for loop.

        Args:
            inputs (numpy.ndarray): Origin images.
            labels (numpy.ndarray): Target labels.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> generate([[0.1,0.2,0.2],[0.2,0.3,0.4]],[2,6])
        """
        if labels is not None:
            inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                    labels)

        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        x_adv = []
        is_advs = []
        queries_times = []

        if labels is not None:
            self.y_targets = labels

        for i, x_single in enumerate(inputs):
            self.queries = 0
            if self.image_targets is not None:
                self.image_target = self.image_targets[i]
            if self.y_targets is not None:
                self.y_target = self.y_targets[i]
            is_adv, adv_img, query_time = self._generate_one(x_single)
            x_adv.append(adv_img)
            is_advs.append(is_adv)
            queries_times.append(query_time)

        return np.asarray(is_advs), \
               np.asarray(x_adv), \
               np.asarray(queries_times)
    def generate(self, inputs, labels):
        """
        Simple iterative FGSM method to generate adversarial examples.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.3, 0.2, 0.6],
            >>>                          [0.3, 0.2, 0.4]],
            >>>                         [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            >>>                          [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        arr_x = inputs
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                if 'self.prob' in globals():
                    d_inputs = _transform_inputs(inputs, self.prob)
                else:
                    d_inputs = inputs
                adv_x = self._attack.generate(d_inputs, labels)
                perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff,
                                  self._eps * clip_diff)
                adv_x = arr_x + perturs
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                if 'self.prob' in globals():
                    d_inputs = _transform_inputs(inputs, self.prob)
                else:
                    d_inputs = inputs
                adv_x = self._attack.generate(d_inputs, labels)
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x
        return adv_x
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and target labels.

        Args:
            inputs (numpy.ndarray): The original, unperturbed inputs.
            labels (numpy.ndarray): The target labels.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> adv_list = attack.generate(([[0.1, 0.2, 0.6],
            >>>                              [0.3, 0, 0.4]],
            >>>                             [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
            >>>                              [0, , 0, 1, 0, 0, 0, 0, 0, 0, 0]])
        """
        arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels',
                                              labels)
        if not self._sparse:
            arr_y = np.argmax(arr_y, axis=1)

        is_adv_list = list()
        adv_list = list()
        query_times_each_adv = list()
        for sample, label in zip(arr_x, arr_y):
            start_t = time.time()
            is_adv, perturbed, query_times = self._generate_one(sample, label)
            is_adv_list.append(is_adv)
            adv_list.append(perturbed)
            query_times_each_adv.append(query_times)
            LOGGER.info(
                TAG, 'Finished one sample, adversarial is {}, '
                'cost time {:.2}s'.format(is_adv,
                                          time.time() - start_t))
        is_adv_list = np.array(is_adv_list)
        adv_list = np.array(adv_list)
        query_times_each_adv = np.array(query_times_each_adv)
        return is_adv_list, adv_list, query_times_each_adv
Exemplo n.º 17
0
    def batch_generate(self, inputs, labels, batch_size=64):
        """
        Generate adversarial examples in batch, based on input samples and
        their labels.

        Args:
            inputs (numpy.ndarray): Samples based on which adversarial
                examples are generated.
            labels (numpy.ndarray): Labels of samples, whose values determined
                by specific attacks.
            batch_size (int): The number of samples in one batch.

        Returns:
            numpy.ndarray, generated adversarial examples

        Examples:
            >>> inputs = Tensor([[0.2, 0.4, 0.5, 0.2], [0.7, 0.2, 0.4, 0.3]])
            >>> labels = [3, 0]
            >>> advs = attack.batch_generate(inputs, labels, batch_size=2)
        """
        arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels',
                                              labels)
        len_x = arr_x.shape[0]
        batch_size = check_int_positive('batch_size', batch_size)
        batchs = int(len_x / batch_size)
        rest = len_x - batchs * batch_size
        res = []
        for i in range(batchs):
            x_batch = arr_x[i * batch_size:(i + 1) * batch_size]
            y_batch = arr_y[i * batch_size:(i + 1) * batch_size]
            adv_x = self.generate(x_batch, y_batch)
            # Black-attack methods will return 3 values, just get the second.
            res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x)

        if rest != 0:
            x_batch = arr_x[batchs * batch_size:]
            y_batch = arr_y[batchs * batch_size:]
            adv_x = self.generate(x_batch, y_batch)
            # Black-attack methods will return 3 values, just get the second.
            res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x)

        adv_x = np.concatenate(res, axis=0)
        return adv_x
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and origin/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.5, 0.2, 0.6],
            >>>                          [0.3, 0, 0.2]],
            >>>                         [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            >>>                          [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        arr_x = inputs
        momentum = 0
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                gradient = self._gradient(inputs, labels)
                momentum = self._decay_factor * momentum + gradient
                adv_x = inputs + self._eps_iter * np.sign(momentum)
                perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff,
                                  self._eps * clip_diff)
                adv_x = arr_x + perturs
                adv_x = np.clip(adv_x, clip_min, clip_max)
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                gradient = self._gradient(inputs, labels)
                momentum = self._decay_factor * momentum + gradient
                adv_x = inputs + self._eps_iter * np.sign(momentum)
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x

        return adv_x
Exemplo n.º 19
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [2, 2])
        """
        LOGGER.debug(TAG, 'start to generate adv image.')
        arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels)
        self._dtype = arr_x.dtype
        adv_list = list()
        for original_x, label_y in zip(arr_x, arr_y):
            adv_list.append(self._optimize(
                original_x, label_y, epsilon=self._eps))
        return np.array(adv_list)
Exemplo n.º 20
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples in batch.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Target labels.

        Returns:
            numpy.ndarray, adversarial samples.

        Examples:
            >>> advs = generate([[0.2, 0.3, 0.4], [0.3, 0.4, 0.5]], [1, 2])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        LOGGER.debug(TAG, 'start to generate adversarial samples.')
        res = []
        for i in range(inputs.shape[0]):
            res.append(self._generate_one(inputs[i], labels[i]))
        LOGGER.debug(TAG, 'finished.')
        return np.asarray(res)
Exemplo n.º 21
0
def _best_logits_of_other_class(logits, target_class, value=1):
    """
    Choose the index of the largest logits exclude target class.

    Args:
        logits (numpy.ndarray): Predict logits of samples.
        target_class (numpy.ndarray): Target labels.
        value (float): Maximum value of output logits. Default: 1.

    Returns:
        numpy.ndarray, the index of the largest logits exclude the target
        class.

    Examples:
        >>> other_class = _best_logits_of_other_class([[0.2, 0.3, 0.5],
        >>> [0.3, 0.4, 0.3]], [2, 1])
    """
    LOGGER.debug(TAG, "enter the func _best_logits_of_other_class.")
    logits, target_class = check_pair_numpy_param('logits', logits,
                                                  'target_class', target_class)
    res = np.zeros_like(logits)
    for i in range(logits.shape[0]):
        res[i][target_class[i]] = value
    return np.argmax(logits - res, axis=1)
Exemplo n.º 22
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and targeted
        labels (or ground_truth labels).

        Args:
            inputs (Union[numpy.ndarray, tuple]): Input samples. The format of inputs should be numpy.ndarray if
                model_type='classification'. The format of inputs can be (input1, input2, ...) or only one array if
                model_type='detection'.
            labels (Union[numpy.ndarray, tuple]): Targeted labels or ground-truth labels. The format of labels should
                be numpy.ndarray if model_type='classification'. The format of labels should be (gt_boxes, gt_labels)
                if model_type='detection'.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]],
            >>> [1, 2])
        """
        # inputs check
        if self._model_type == 'classification':
            inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                    'labels', labels)
            if self._sparse:
                if labels.size > 1:
                    label_squ = np.squeeze(labels)
                else:
                    label_squ = labels
                if len(label_squ.shape) >= 2 or label_squ.shape[0] != inputs.shape[0]:
                    msg = "The parameter 'sparse' of PSOAttack is True, but the input labels is not sparse style and " \
                          "got its shape as {}.".format(labels.shape)
                    LOGGER.error(TAG, msg)
                    raise ValueError(msg)
            else:
                labels = np.argmax(labels, axis=1)
            images = inputs
        elif self._model_type == 'detection':
            images, auxiliary_inputs, gt_boxes, gt_labels = check_detection_inputs(inputs, labels)

        # generate one adversarial each time
        adv_list = []
        success_list = []
        query_times_list = []
        for i in range(images.shape[0]):
            is_success = False
            q_times = 0
            x_ori = images[i]
            if not self._bounds:
                self._bounds = [np.min(x_ori), np.max(x_ori)]
            pixel_deep = self._bounds[1] - self._bounds[0]

            q_times += 1
            if self._model_type == 'classification':
                label_i = labels[i]
                confi_ori = self._confidence_cla(x_ori, label_i)
            elif self._model_type == 'detection':
                auxiliary_input_i = tuple()
                for item in auxiliary_inputs:
                    auxiliary_input_i += (np.expand_dims(item[i], axis=0),)
                gt_boxes_i, gt_labels_i = np.expand_dims(gt_boxes[i], axis=0), np.expand_dims(gt_labels[i], axis=0)
                inputs_i = (images[i],) + auxiliary_input_i
                confi_ori, gt_object_num = self._detection_scores(inputs_i, gt_boxes_i, gt_labels_i, self._model)
                LOGGER.info(TAG, 'The number of ground-truth objects is %s', gt_object_num[0])

            # step1, initializing
            # initial global optimum fitness value, cannot set to be -inf
            best_fitness = -np.inf
            # initial global optimum position
            best_position = x_ori
            x_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0)
            cur_noise = np.clip(np.random.random(x_copies.shape)*pixel_deep,
                                (0 - self._per_bounds)*(np.abs(x_copies) + 0.1),
                                self._per_bounds*(np.abs(x_copies) + 0.1))

            # initial advs
            par = np.clip(x_copies + cur_noise, self._bounds[0], self._bounds[1])
            # initial optimum positions for particles
            par_best_poi = np.copy(par)
            # initial optimum fitness values
            par_best_fit = -np.inf*np.ones(self._pop_size)
            # step2, optimization
            # initial velocities for particles
            v_particles = np.zeros(par.shape)
            is_mutation = False
            iters = 0
            while iters < self._t_max:
                last_best_fit = best_fitness
                ran_1 = np.random.random(par.shape)
                ran_2 = np.random.random(par.shape)
                v_particles = self._step_size*(
                    v_particles + self._c1*ran_1*(best_position - par)) \
                              + self._c2*ran_2*(par_best_poi - par)

                par += v_particles

                if iters > 6 and is_mutation:
                    par = self._mutation_op(par)

                par = np.clip(np.clip(par,
                                      x_copies - (np.abs(x_copies) + 0.1*pixel_deep)*self._per_bounds,
                                      x_copies + (np.abs(x_copies) + 0.1*pixel_deep)*self._per_bounds),
                              self._bounds[0], self._bounds[1])

                if self._model_type == 'classification':
                    confi_adv = self._confidence_cla(par, label_i)
                elif self._model_type == 'detection':
                    confi_adv, _ = self._detection_scores(
                        (par,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model)
                q_times += self._pop_size
                fit_value = self._fitness(confi_ori, confi_adv, x_ori, par)
                for k in range(self._pop_size):
                    if fit_value[k] > par_best_fit[k]:
                        par_best_fit[k] = fit_value[k]
                        par_best_poi[k] = par[k]
                    if fit_value[k] > best_fitness:
                        best_fitness = fit_value[k]
                        best_position = par[k].copy()
                iters += 1
                if best_fitness < -2:
                    LOGGER.debug(TAG, 'best fitness value is %s, which is too small. We recommend that you decrease '
                                      'the value of the initialization parameter c.', best_fitness)
                if iters < 3 and best_fitness > 100:
                    LOGGER.debug(TAG, 'best fitness value is %s, which is too large. We recommend that you increase '
                                      'the value of the initialization parameter c.', best_fitness)
                is_mutation = False
                if (best_fitness - last_best_fit) < last_best_fit*0.05:
                    is_mutation = True

                q_times += 1
                if self._model_type == 'classification':
                    cur_pre = self._model.predict(best_position)
                    cur_label = np.argmax(cur_pre)
                    if (self._targeted and cur_label == label_i) or (not self._targeted and cur_label != label_i):
                        is_success = True
                elif self._model_type == 'detection':
                    _, correct_nums_adv = self._detection_scores(
                        (best_position,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model)
                    LOGGER.info(TAG, 'The number of correctly detected objects in adversarial image is %s',
                                correct_nums_adv[0])
                    if correct_nums_adv <= int(gt_object_num*self._reserve_ratio):
                        is_success = True

                if is_success:
                    LOGGER.debug(TAG, 'successfully find one adversarial '
                                      'sample and start Reduction process')
                    # step3, reduction
                    if self._model_type == 'classification':
                        best_position, q_times = self._reduction(x_ori, q_times, label_i, best_position, self._model,
                                                                 targeted_attack=self._targeted)

                    break
            if self._model_type == 'detection':
                best_position, q_times = self._fast_reduction(x_ori, best_position, q_times,
                                                              auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model)
            if not is_success:
                LOGGER.debug(TAG,
                             'fail to find adversarial sample, iteration '
                             'times is: %d and query times is: %d',
                             iters,
                             q_times)
            adv_list.append(best_position)
            success_list.append(is_success)
            query_times_list.append(q_times)
            del x_copies, cur_noise, par, par_best_poi
        return np.asarray(success_list), \
               np.asarray(adv_list), \
               np.asarray(query_times_list)
Exemplo n.º 23
0
 def __init__(self, raw_preds, def_preds, true_labels):
     self._raw_preds, self._def_preds = check_pair_numpy_param(
         'raw_preds', raw_preds, 'def_preds', def_preds)
     self._true_labels = check_numpy_param('true_labels', true_labels)
     self._num_samples = len(true_labels)
Exemplo n.º 24
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and targeted
        labels (or ground_truth labels).

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Targeted labels or ground_truth labels.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]],
            >>> [1, 2])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        # generate one adversarial each time
        if self._targeted:
            target_labels = labels
        adv_list = []
        success_list = []
        query_times_list = []
        pixel_deep = self._bounds[1] - self._bounds[0]
        for i in range(inputs.shape[0]):
            is_success = False
            q_times = 0
            x_ori = inputs[i]
            confidences = self._model.predict(np.expand_dims(x_ori, axis=0))[0]
            q_times += 1
            true_label = labels[i]
            if self._targeted:
                t_label = target_labels[i]
                confi_ori = confidences[t_label]
            else:
                confi_ori = max(confidences)
            # step1, initializing
            # initial global optimum fitness value, cannot set to be 0
            best_fitness = -np.inf
            # initial global optimum position
            best_position = x_ori
            x_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0)
            cur_noise = np.clip((np.random.random(x_copies.shape) - 0.5)
                                *self._step_size,
                                (0 - self._per_bounds)*(x_copies + 0.1),
                                self._per_bounds*(x_copies + 0.1))
            par = np.clip(x_copies + cur_noise,
                          x_copies*(1 - self._per_bounds),
                          x_copies*(1 + self._per_bounds))
            # initial advs
            par_ori = np.copy(par)
            # initial optimum positions for particles
            par_best_poi = np.copy(par)
            # initial optimum fitness values
            par_best_fit = -np.inf*np.ones(self._pop_size)
            # step2, optimization
            # initial velocities for particles
            v_particles = np.zeros(par.shape)
            is_mutation = False
            iters = 0
            while iters < self._t_max:
                last_best_fit = best_fitness
                ran_1 = np.random.random(par.shape)
                ran_2 = np.random.random(par.shape)
                v_particles = self._step_size*(
                    v_particles + self._c1*ran_1*(best_position - par)) \
                              + self._c2*ran_2*(par_best_poi - par)
                par = np.clip(par + v_particles,
                              (par_ori + 0.1*pixel_deep)*(
                                  1 - self._per_bounds),
                              (par_ori + 0.1*pixel_deep)*(
                                  1 + self._per_bounds))
                if iters > 30 and is_mutation:
                    par = self._mutation_op(par)
                if self._targeted:
                    confi_adv = self._model.predict(par)[:, t_label]
                else:
                    confi_adv = np.max(self._model.predict(par), axis=1)
                q_times += self._pop_size
                fit_value = self._fitness(confi_ori, confi_adv, x_ori, par)
                for k in range(self._pop_size):
                    if fit_value[k] > par_best_fit[k]:
                        par_best_fit[k] = fit_value[k]
                        par_best_poi[k] = par[k]
                    if fit_value[k] > best_fitness:
                        best_fitness = fit_value[k]
                        best_position = par[k]
                iters += 1
                cur_pre = self._model.predict(np.expand_dims(best_position,
                                                             axis=0))[0]
                is_mutation = False
                if (best_fitness - last_best_fit) < last_best_fit*0.05:
                    is_mutation = True
                cur_label = np.argmax(cur_pre)
                q_times += 1
                if self._targeted:
                    if cur_label == t_label:
                        is_success = True
                else:
                    if cur_label != true_label:
                        is_success = True
                if is_success:
                    LOGGER.debug(TAG, 'successfully find one adversarial '
                                      'sample and start Reduction process')
                    # step3, reduction
                    if self._targeted:
                        best_position, q_times = self._reduction(
                            x_ori, q_times, t_label, best_position)
                    else:
                        best_position, q_times = self._reduction(
                            x_ori, q_times, true_label, best_position)
                    break
            if not is_success:
                LOGGER.debug(TAG,
                             'fail to find adversarial sample, iteration '
                             'times is: %d and query times is: %d',
                             iters,
                             q_times)
            adv_list.append(best_position)
            success_list.append(is_success)
            query_times_list.append(q_times)
            del x_copies, cur_noise, par, par_ori, par_best_poi
        return np.asarray(success_list), \
               np.asarray(adv_list), \
               np.asarray(query_times_list)
Exemplo n.º 25
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and targeted labels.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): The ground truth label of input samples
                or target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> advs = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [1, 2]]
        """

        LOGGER.debug(TAG, "enter the func generate.")
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        self._dtype = inputs.dtype
        att_original = self._to_attack_space(inputs)
        reconstructed_original, _ = self._to_model_space(att_original)

        # find an adversarial sample
        const = np.ones_like(labels, dtype=self._dtype) * self._initial_const
        lower_bound = np.zeros_like(labels, dtype=self._dtype)
        upper_bound = np.ones_like(labels, dtype=self._dtype) * np.inf
        adversarial_res = inputs.copy()
        adversarial_loss = np.ones_like(labels, dtype=self._dtype) * np.inf
        samples_num = labels.shape[0]
        adv_flag = np.zeros_like(labels)
        for binary_search_step in range(self._bin_search_steps):
            if (binary_search_step == self._bin_search_steps - 1) and \
                    (self._bin_search_steps >= 10):
                const = min(1e10, upper_bound)
            LOGGER.debug(TAG, 'starting optimization with const = %s',
                         str(const))

            att_perturbation = np.zeros_like(att_original, dtype=self._dtype)
            loss_at_previous_check = np.ones_like(labels,
                                                  dtype=self._dtype) * np.inf

            # create a new optimizer to minimize the perturbation
            optimizer = _AdamOptimizer(att_perturbation.shape)

            for iteration in range(self._max_iterations):
                x_input, dxdp = self._to_model_space(att_original +
                                                     att_perturbation)
                logits = self._network(Tensor(x_input)).asnumpy()

                current_l2_loss, current_loss, dldx = self._loss_function(
                    logits, x_input, reconstructed_original, labels, const,
                    self._confidence)

                # check if attack success (include all examples)
                if self._targeted:
                    is_adv = (np.argmax(logits, axis=1) == labels)
                else:
                    is_adv = (np.argmax(logits, axis=1) != labels)

                for i in range(samples_num):
                    if is_adv[i]:
                        adv_flag[i] = True
                        if current_l2_loss[i] < adversarial_loss[i]:
                            adversarial_res[i] = x_input[i]
                            adversarial_loss[i] = current_l2_loss[i]

                if np.all(adv_flag):
                    if self._fast:
                        LOGGER.debug(TAG, "succeed find adversarial examples.")
                        msg = 'iteration: {}, logits_att: {}, ' \
                              'loss: {}, l2_dist: {}' \
                            .format(iteration,
                                    np.argmax(logits, axis=1),
                                    current_loss, current_l2_loss)
                        LOGGER.debug(TAG, msg)
                        return adversarial_res

                dldx, inputs = check_equal_shape('dldx', dldx, 'inputs',
                                                 inputs)

                gradient = dldx * dxdp
                att_perturbation += \
                    optimizer(gradient, self._learning_rate)

                # check if should stop iteration early
                flag = True
                iter_check = iteration % (np.ceil(
                    self._max_iterations * self._abort_early_check_ratio))
                if self._abort_early and iter_check == 0:
                    # check progress
                    for i in range(inputs.shape[0]):
                        if current_loss[i] <= .9999 * loss_at_previous_check[i]:
                            flag = False
                    # stop Adam if all samples has no progress
                    if flag:
                        LOGGER.debug(
                            TAG, 'step:%d, no progress yet, stop iteration',
                            binary_search_step)
                        break
                    loss_at_previous_check = current_loss

            for i in range(samples_num):
                # update bound based on search result
                if adv_flag[i]:
                    LOGGER.debug(
                        TAG, 'example %d, found adversarial with const=%f', i,
                        const[i])
                    upper_bound[i] = const[i]
                else:
                    LOGGER.debug(
                        TAG, 'example %d, failed to find adversarial'
                        ' with const=%f', i, const[i])
                    lower_bound[i] = const[i]

                if upper_bound[i] == np.inf:
                    const[i] *= 10
                else:
                    const[i] = (lower_bound[i] + upper_bound[i]) / 2

        return adversarial_res
Exemplo n.º 26
0
    def _loss_function(self, logits, new_x, org_x, org_or_target_class,
                       constant, confidence):
        """
        Calculate the value of loss function and gradients of loss w.r.t inputs.

        Args:
            logits (numpy.ndarray): The output of network before softmax.
            new_x (numpy.ndarray): Adversarial examples.
            org_x (numpy.ndarray): Original benign input samples.
            org_or_target_class (numpy.ndarray): Original/target labels.
            constant (float): A trade-off constant to use to balance loss
                and perturbation norm.
            confidence (float): Confidence level of the output of adversarial
                examples.

        Returns:
            numpy.ndarray, norm of perturbation, sum of the loss and the
            norm, and gradients of the sum w.r.t inputs.

        Raises:
            ValueError: If loss is less than 0.

        Examples:
            >>> L2_loss, total_loss, dldx = self._loss_function([0.2 , 0.3,
            >>> 0.5], [0.1, 0.2, 0.2, 0.4], [0.12, 0.2, 0.25, 0.4], [1], 2, 0)
        """
        LOGGER.debug(TAG, "enter the func _loss_function.")

        logits = check_numpy_param('logits', logits)
        org_x = check_numpy_param('org_x', org_x)
        new_x, org_or_target_class = check_pair_numpy_param(
            'new_x', new_x, 'org_or_target_class', org_or_target_class)

        new_x, org_x = check_equal_shape('new_x', new_x, 'org_x', org_x)

        other_class_index = _best_logits_of_other_class(logits,
                                                        org_or_target_class,
                                                        value=np.inf)
        loss1 = np.sum((new_x - org_x)**2,
                       axis=tuple(range(len(new_x.shape))[1:]))
        loss2 = np.zeros_like(loss1, dtype=self._dtype)
        loss2_grade = np.zeros_like(new_x, dtype=self._dtype)
        jaco_grad = jacobian_matrix(self._net_grad, new_x, self._num_classes)
        if self._targeted:
            for i in range(org_or_target_class.shape[0]):
                loss2[i] = max(
                    0, logits[i][other_class_index[i]] -
                    logits[i][org_or_target_class[i]] + confidence)
                loss2_grade[i] = constant[i] * (
                    jaco_grad[other_class_index[i]][i] -
                    jaco_grad[org_or_target_class[i]][i])
        else:
            for i in range(org_or_target_class.shape[0]):
                loss2[i] = max(
                    0, logits[i][org_or_target_class[i]] -
                    logits[i][other_class_index[i]] + confidence)
                loss2_grade[i] = constant[i] * (
                    jaco_grad[org_or_target_class[i]][i] -
                    jaco_grad[other_class_index[i]][i])
        total_loss = loss1 + constant * loss2
        loss1_grade = 2 * (new_x - org_x)
        for i in range(org_or_target_class.shape[0]):
            if loss2[i] < 0:
                msg = 'loss value should greater than or equal to 0, ' \
                      'but got loss2 {}'.format(loss2[i])
                LOGGER.error(TAG, msg)
                raise ValueError(msg)
            if loss2[i] == 0:
                loss2_grade[i, ...] = 0
        total_loss_grade = loss1_grade + loss2_grade
        return loss1, total_loss, total_loss_grade
Exemplo n.º 27
0
    def batch_generate(self, inputs, labels, batch_size=64):
        """
        Generate adversarial examples in batch, based on input samples and
        their labels.

        Args:
            inputs (numpy.ndarray): Samples based on which adversarial
                examples are generated.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.
            batch_size (int): The number of samples in one batch.

        Returns:
            numpy.ndarray, generated adversarial examples

        Examples:
            >>> inputs = np.array([[0.2, 0.4, 0.5, 0.2], [0.7, 0.2, 0.4, 0.3]])
            >>> labels = np.array([3, 0])
            >>> advs = attack.batch_generate(inputs, labels, batch_size=2)
        """
        if isinstance(labels, tuple):
            for i, labels_item in enumerate(labels):
                arr_x, _ = check_pair_numpy_param('inputs', inputs, \
                    'labels[{}]'.format(i), labels_item)
        else:
            arr_x, _ = check_pair_numpy_param('inputs', inputs, \
                'labels', labels)
        arr_y = labels
        len_x = arr_x.shape[0]
        batch_size = check_int_positive('batch_size', batch_size)
        batches = int(len_x / batch_size)
        rest = len_x - batches * batch_size
        res = []
        for i in range(batches):
            x_batch = arr_x[i * batch_size:(i + 1) * batch_size]
            if isinstance(arr_y, tuple):
                y_batch = tuple([
                    sub_labels[i * batch_size:(i + 1) * batch_size]
                    for sub_labels in arr_y
                ])
            else:
                y_batch = arr_y[i * batch_size:(i + 1) * batch_size]
            adv_x = self.generate(x_batch, y_batch)
            # Black-attack methods will return 3 values, just get the second.
            res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x)

        if rest != 0:
            x_batch = arr_x[batches * batch_size:]
            if isinstance(arr_y, tuple):
                y_batch = tuple([
                    sub_labels[batches * batch_size:] for sub_labels in arr_y
                ])
            else:
                y_batch = arr_y[batches * batch_size:]
            y_batch = arr_y[batches * batch_size:]
            adv_x = self.generate(x_batch, y_batch)
            # Black-attack methods will return 3 values, just get the second.
            res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x)

        adv_x = np.concatenate(res, axis=0)
        return adv_x
Exemplo n.º 28
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and targeted labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples.
            labels (numpy.ndarray): For targeted attack, labels are adversarial target labels.
                For untargeted attack, labels are ground-truth labels.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> is_adv_list, adv_list, query_times_each_adv = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [2, 3])
        """
        arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels',
                                              labels)
        if not self._sparse:
            arr_y = np.argmax(arr_y, axis=1)
        ini_bool, ini_advs, ini_count = self._initialize_starting_point(
            arr_x, arr_y)
        is_adv_list = list()
        adv_list = list()
        query_times_each_adv = list()
        for sample, sample_label, start_adv, ite_bool, ite_c in zip(
                arr_x, arr_y, ini_advs, ini_bool, ini_count):
            if ite_bool:
                LOGGER.info(TAG, 'Start optimizing.')
                ori_label = np.argmax(
                    self._model.predict(np.expand_dims(sample, axis=0))[0])
                ini_label = np.argmax(
                    self._model.predict(np.expand_dims(start_adv, axis=0))[0])
                is_adv, adv_x, query_times = self._decision_optimize(
                    sample, sample_label, start_adv)
                adv_label = np.argmax(
                    self._model.predict(np.expand_dims(adv_x, axis=0))[0])
                LOGGER.info(TAG,
                            'before ini attack label is :{}'.format(ori_label))
                LOGGER.info(TAG,
                            'after ini attack label is :{}'.format(ini_label))
                LOGGER.info(TAG,
                            'INPUT optimize label is :{}'.format(sample_label))
                LOGGER.info(
                    TAG,
                    'after pointwise attack label is :{}'.format(adv_label))
                is_adv_list.append(is_adv)
                adv_list.append(adv_x)
                query_times_each_adv.append(query_times + ite_c)
            else:
                LOGGER.info(TAG, 'Initial sample is not adversarial, pass.')
                is_adv_list.append(False)
                adv_list.append(start_adv)
                query_times_each_adv.append(ite_c)
        is_adv_list = np.array(is_adv_list)
        adv_list = np.array(adv_list)
        query_times_each_adv = np.array(query_times_each_adv)
        LOGGER.info(TAG, 'ret list is: {}'.format(adv_list))
        return is_adv_list, adv_list, query_times_each_adv
Exemplo n.º 29
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original labels.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Original labels.

        Returns:
            numpy.ndarray, adversarial examples.

        Raises:
            NotImplementedError: If norm_level is not in [2, np.inf, '2', 'inf'].

        Examples:
            >>> advs = generate([[0.2, 0.3, 0.4], [0.3, 0.4, 0.5]], [1, 2])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        inputs_dtype = inputs.dtype
        iteration = 0
        origin_labels = labels
        cur_labels = origin_labels.copy()
        weight = np.squeeze(np.zeros(inputs.shape[1:]))
        r_tot = np.zeros(inputs.shape)
        x_origin = inputs
        while np.any(
                cur_labels == origin_labels) and iteration < self._max_iters:
            preds = self._network(Tensor(inputs)).asnumpy()
            grads = jacobian_matrix(self._net_grad, inputs, self._num_classes)
            for idx in range(inputs.shape[0]):
                diff_w = np.inf
                label = origin_labels[idx]
                if cur_labels[idx] != label:
                    continue
                for k in range(self._num_classes):
                    if k == label:
                        continue
                    w_k = grads[k, idx, ...] - grads[label, idx, ...]
                    f_k = preds[idx, k] - preds[idx, label]
                    if self._norm_level == 2 or self._norm_level == '2':
                        diff_w_k = abs(f_k) / (np.linalg.norm(w_k) + 1e-8)
                    elif self._norm_level == np.inf \
                            or self._norm_level == 'inf':
                        diff_w_k = abs(f_k) / (np.linalg.norm(w_k, ord=1) +
                                               1e-8)
                    else:
                        msg = 'ord {} is not available.' \
                            .format(str(self._norm_level))
                        LOGGER.error(TAG, msg)
                        raise NotImplementedError(msg)
                    if diff_w_k < diff_w:
                        diff_w = diff_w_k
                        weight = w_k

                if self._norm_level == 2 or self._norm_level == '2':
                    r_i = diff_w * weight / (np.linalg.norm(weight) + 1e-8)
                elif self._norm_level == np.inf or self._norm_level == 'inf':
                    r_i = diff_w*np.sign(weight) \
                          / (np.linalg.norm(weight, ord=1) + 1e-8)
                else:
                    msg = 'ord {} is not available in normalization.' \
                        .format(str(self._norm_level))
                    LOGGER.error(TAG, msg)
                    raise NotImplementedError(msg)
                r_tot[idx, ...] = r_tot[idx, ...] + r_i

            if self._bounds is not None:
                clip_min, clip_max = self._bounds
                inputs = x_origin + (1 + self._overshoot) * r_tot * (clip_max -
                                                                     clip_min)
                inputs = np.clip(inputs, clip_min, clip_max)
            else:
                inputs = x_origin + (1 + self._overshoot) * r_tot
            cur_labels = np.argmax(self._network(
                Tensor(inputs.astype(inputs_dtype))).asnumpy(),
                                   axis=1)
            iteration += 1
            inputs = inputs.astype(inputs_dtype)
            del preds, grads
        return inputs
Exemplo n.º 30
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and targeted
        labels (or ground_truth labels).

        Args:
            inputs (Union[numpy.ndarray, tuple]): Input samples. The format of inputs should be numpy.ndarray if
                model_type='classification'. The format of inputs can be (input1, input2, ...) or only one array if
                model_type='detection'.
            labels (Union[numpy.ndarray, tuple]): Targeted labels or ground-truth labels. The format of labels should
                be numpy.ndarray if model_type='classification'. The format of labels should be (gt_boxes, gt_labels)
                if model_type='detection'.

        Returns:
            - numpy.ndarray, bool values for each attack result.

            - numpy.ndarray, generated adversarial examples.

            - numpy.ndarray, query times for each sample.

        Examples:
            >>> advs = attack.generate([[0.2, 0.3, 0.4],
            >>>                         [0.3, 0.3, 0.2]],
            >>>                        [1, 2])
        """
        if self._model_type == 'classification':
            inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                    'labels', labels)
            if self._sparse:
                if labels.size > 1:
                    label_squ = np.squeeze(labels)
                else:
                    label_squ = labels
                if len(label_squ.shape) >= 2 or label_squ.shape[0] != inputs.shape[0]:
                    msg = "The parameter 'sparse' of GeneticAttack is True, but the input labels is not sparse style " \
                          "and got its shape as {}.".format(labels.shape)
                    LOGGER.error(TAG, msg)
                    raise ValueError(msg)
            else:
                labels = np.argmax(labels, axis=1)
            images = inputs
        elif self._model_type == 'detection':
            images, auxiliary_inputs, gt_boxes, gt_labels = check_detection_inputs(inputs, labels)

        adv_list = []
        success_list = []
        query_times_list = []
        for i in range(images.shape[0]):
            is_success = False
            x_ori = images[i]
            if not self._bounds:
                self._bounds = [np.min(x_ori), np.max(x_ori)]
            pixel_deep = self._bounds[1] - self._bounds[0]

            if self._model_type == 'classification':
                label_i = labels[i]
            elif self._model_type == 'detection':
                auxiliary_input_i = tuple()
                for item in auxiliary_inputs:
                    auxiliary_input_i += (np.expand_dims(item[i], axis=0),)
                gt_boxes_i, gt_labels_i = np.expand_dims(gt_boxes[i], axis=0), np.expand_dims(gt_labels[i], axis=0)
                inputs_i = (images[i],) + auxiliary_input_i
                confi_ori, gt_object_num = self._detection_scores(inputs_i, gt_boxes_i, gt_labels_i, model=self._model)
                LOGGER.info(TAG, 'The number of ground-truth objects is %s', gt_object_num[0])

            # generate particles
            ori_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0)
            # initial perturbations
            cur_pert = np.random.uniform(self._bounds[0], self._bounds[1], ori_copies.shape)
            cur_pop = ori_copies + cur_pert
            query_times = 0
            iters = 0

            while iters < self._max_steps:
                iters += 1
                cur_pop = np.clip(np.clip(cur_pop,
                                          ori_copies - pixel_deep*self._per_bounds,
                                          ori_copies + pixel_deep*self._per_bounds),
                                  self._bounds[0], self._bounds[1])

                if self._model_type == 'classification':
                    pop_preds = self._model.predict(cur_pop)
                    query_times += cur_pop.shape[0]
                    all_preds = np.argmax(pop_preds, axis=1)
                    if self._targeted:
                        success_pop = np.equal(label_i, all_preds).astype(np.int32)
                    else:
                        success_pop = np.not_equal(label_i, all_preds).astype(np.int32)
                    is_success = max(success_pop)
                    best_idx = np.argmax(success_pop)
                    target_preds = pop_preds[:, label_i]
                    others_preds_sum = np.sum(pop_preds, axis=1) - target_preds
                    if self._targeted:
                        fit_vals = target_preds - others_preds_sum
                    else:
                        fit_vals = others_preds_sum - target_preds

                elif self._model_type == 'detection':
                    confi_adv, correct_nums_adv = self._detection_scores(
                        (cur_pop,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, model=self._model)
                    LOGGER.info(TAG, 'The number of correctly detected objects in adversarial image is %s',
                                np.min(correct_nums_adv))
                    query_times += self._pop_size
                    fit_vals = abs(
                        confi_ori - confi_adv) - self._c / self._pop_size * np.linalg.norm(
                            (cur_pop - x_ori).reshape(cur_pop.shape[0], -1), axis=1)

                    if np.max(fit_vals) < 0:
                        self._c /= 2

                    if np.max(fit_vals) < -2:
                        LOGGER.debug(TAG,
                                     'best fitness value is %s, which is too small. We recommend that you decrease '
                                     'the value of the initialization parameter c.', np.max(fit_vals))
                    if iters < 3 and np.max(fit_vals) > 100:
                        LOGGER.debug(TAG,
                                     'best fitness value is %s, which is too large. We recommend that you increase '
                                     'the value of the initialization parameter c.', np.max(fit_vals))

                    if np.min(correct_nums_adv) <= int(gt_object_num*self._reserve_ratio):
                        is_success = True
                        best_idx = np.argmin(correct_nums_adv)

                if is_success:
                    LOGGER.debug(TAG, 'successfully find one adversarial sample '
                                      'and start Reduction process.')
                    final_adv = cur_pop[best_idx]
                    if self._model_type == 'classification':
                        final_adv, query_times = self._reduction(x_ori, query_times, label_i, final_adv,
                                                                 model=self._model, targeted_attack=self._targeted)
                    break

                best_fit = max(fit_vals)

                if best_fit > self._best_fit:
                    self._best_fit = best_fit
                    self._plateau_times = 0
                else:
                    self._plateau_times += 1
                adap_threshold = (lambda z: 100 if z > -0.4 else 300)(best_fit)
                if self._plateau_times > adap_threshold:
                    self._adap_times += 1
                    self._plateau_times = 0
                if self._adaptive:
                    step_noise = max(self._step_size, 0.4*(0.9**self._adap_times))
                    step_p = max(self._mutation_rate, 0.5*(0.9**self._adap_times))
                else:
                    step_noise = self._step_size
                    step_p = self._mutation_rate
                step_temp = self._temp
                elite = cur_pop[np.argmax(fit_vals)]
                select_probs = softmax(fit_vals/step_temp)
                select_args = np.arange(self._pop_size)
                parents_arg = np.random.choice(
                    a=select_args, size=2*(self._pop_size - 1),
                    replace=True, p=select_probs)
                parent1 = cur_pop[parents_arg[:self._pop_size - 1]]
                parent2 = cur_pop[parents_arg[self._pop_size - 1:]]
                parent1_probs = select_probs[parents_arg[:self._pop_size - 1]]
                parent2_probs = select_probs[parents_arg[self._pop_size - 1:]]
                parent2_probs = parent2_probs / (parent1_probs + parent2_probs)
                # duplicate the probabilities to all features of each particle.
                dims = len(x_ori.shape)
                for _ in range(dims):
                    parent2_probs = parent2_probs[:, np.newaxis]
                parent2_probs = np.tile(parent2_probs, ((1,) + x_ori.shape))
                cross_probs = (np.random.random(parent1.shape) >
                               parent2_probs).astype(np.int32)
                childs = parent1*cross_probs + parent2*(1 - cross_probs)
                mutated_childs = self._mutation(
                    childs, step_noise=self._per_bounds*step_noise,
                    prob=step_p)
                cur_pop = np.concatenate((mutated_childs, elite[np.newaxis, :]))

            if not is_success:
                LOGGER.debug(TAG, 'fail to find adversarial sample.')
                final_adv = elite
            if self._model_type == 'detection':
                final_adv, query_times = self._fast_reduction(
                    x_ori, final_adv, query_times, auxiliary_input_i, gt_boxes_i, gt_labels_i, model=self._model)
            adv_list.append(final_adv)

            LOGGER.debug(TAG,
                         'iteration times is: %d and query times is: %d',
                         iters,
                         query_times)
            success_list.append(is_success)
            query_times_list.append(query_times)
            del ori_copies, cur_pert, cur_pop
        return np.asarray(success_list), \
               np.asarray(adv_list), \
               np.asarray(query_times_list)