def test_fast_gradient_sign_method(): """ Fast gradient sign method unit test. """ input_np = np.asarray([[0.1, 0.2, 0.7]], np.float32) label = np.asarray([2], np.int32) label = np.eye(3)[label].astype(np.float32) attack = FastGradientSignMethod(Net()) ms_adv_x = attack.generate(input_np, label) assert np.any(ms_adv_x != input_np), 'Fast gradient sign method: generate' \ ' value must not be equal to' \ ' original value.'
class BasicIterativeMethod(IterativeGradientMethod): """ The Basic Iterative Method attack, an iterative FGSM method to generate adversarial examples. References: `A. Kurakin, I. Goodfellow, and S. Bengio, "Adversarial examples in the physical world," in ICLR, 2017 <https://arxiv.org/abs/1607.02533>`_ Args: network (Cell): Target model. eps (float): Proportion of adversarial perturbation generated by the attack to data range. Default: 0.3. eps_iter (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.1. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). is_targeted (bool): If True, targeted attack. If False, untargeted attack. Default: False. nb_iter (int): Number of iteration. Default: 5. loss_fn (Loss): Loss function for optimization. Default: None. attack (class): The single step gradient method of each iteration. In this class, FGSM is used. Examples: >>> attack = BasicIterativeMethod(network) """ def __init__(self, network, eps=0.3, eps_iter=0.1, bounds=(0.0, 1.0), is_targeted=False, nb_iter=5, loss_fn=None): super(BasicIterativeMethod, self).__init__(network, eps=eps, eps_iter=eps_iter, bounds=bounds, nb_iter=nb_iter, loss_fn=loss_fn) self._is_targeted = check_param_type('is_targeted', is_targeted, bool) self._attack = FastGradientSignMethod(self._network, eps=self._eps_iter, bounds=self._bounds, is_targeted=self._is_targeted, loss_fn=loss_fn) def generate(self, inputs, labels): """ Simple iterative FGSM method to generate adversarial examples. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv_x = attack.generate([[0.3, 0.2, 0.6], >>> [0.3, 0.2, 0.4]], >>> [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0], >>> [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) arr_x = inputs if self._bounds is not None: clip_min, clip_max = self._bounds clip_diff = clip_max - clip_min for _ in range(self._nb_iter): if 'self.prob' in globals(): d_inputs = _transform_inputs(inputs, self.prob) else: d_inputs = inputs adv_x = self._attack.generate(d_inputs, labels) perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff, self._eps * clip_diff) adv_x = arr_x + perturs inputs = adv_x else: for _ in range(self._nb_iter): if 'self.prob' in globals(): d_inputs = _transform_inputs(inputs, self.prob) else: d_inputs = inputs adv_x = self._attack.generate(d_inputs, labels) adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps) inputs = adv_x return adv_x