def __call__(self, model: Model, inputs: T, criterion: Union[Misclassification, T]) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): classes = criterion_.labels else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected labels to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds x_l2_norm = flatten(x.square()).sum(1) def loss_fun( x: ep.Tensor) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(x) scores = ep.softmax(logits) pred_scores = scores[range(N), classes] loss = pred_scores.sum() return loss, (scores, pred_scores) for i in range(self.steps): # (1) get the scores and gradients _, (scores, pred_scores), gradients = ep.value_aux_and_grad(loss_fun, x) pred = scores.argmax(-1) num_classes = scores.shape[-1] # (2) calculate gradient norm gradients_l2_norm = flatten(gradients.square()).sum(1) # (3) calculate delta a = self.stepsize * x_l2_norm * gradients_l2_norm b = pred_scores - 1.0 / num_classes delta = ep.minimum(a, b) # (4) stop the attack if an adversarial example has been found # this is not described in the paper but otherwise once the prob. drops # below chance level the likelihood is not decreased but increased is_not_adversarial = (pred == classes).float32() delta *= is_not_adversarial # (5) calculate & apply current perturbation a = atleast_kd(delta / gradients_l2_norm.square(), gradients.ndim) x -= a * gradients x = ep.clip(x, min_, max_) return restore_type(x)
def test_value_aux_and_grad(dummy: Tensor) -> None: if isinstance(dummy, ep.NumPyTensor): pytest.skip() def f(x: Tensor) -> Tuple[Tensor, Tensor]: x = x.square() return x.sum(), x t = ep.arange(dummy, 8).float32().reshape((2, 4)) v, aux, g = ep.value_aux_and_grad(f, t) assert v.item() == 140 assert (aux == t.square()).all() assert (g == 2 * t).all()