def clip_perturbation(self, references: T, perturbed: T, epsilon: float) -> T: """Clips the perturbations to epsilon and returns the new perturbed Args: references: A batch of reference inputs. perturbed: A batch of perturbed inputs. Returns: A tenosr like perturbed but with the perturbation clipped to epsilon. """ (x, y), restore_type = ep.astensors_(references, perturbed) p = y - x if self.p == ep.inf: clipped_perturbation = ep.clip(p, -epsilon, epsilon) return restore_type(x + clipped_perturbation) norms = ep.norms.lp(flatten(p), self.p, axis=-1) norms = ep.maximum(norms, 1e-12) # avoid divsion by zero factor = epsilon / norms factor = ep.minimum( 1, factor) # clipping -> decreasing but not increasing if self.p == 0: if (factor == 1).all(): return perturbed raise NotImplementedError("reducing L0 norms not yet supported") factor = atleast_kd(factor, x.ndim) clipped_perturbation = factor * p return restore_type(x + clipped_perturbation)
def l2_clipping_aware_rescaling(x, delta, eps: float, a: float = 0.0, b: float = 1.0): # type: ignore """Calculates eta such that norm(clip(x + eta * delta, a, b) - x) == eps. Assumes x and delta have a batch dimension and eps, a, b, and p are scalars. If the equation cannot be solved because eps is too large, the left hand side is maximized. Args: x: A batch of inputs (PyTorch Tensor, TensorFlow Eager Tensor, NumPy Array, JAX Array, or EagerPy Tensor). delta: A batch of perturbation directions (same shape and type as x). eps: The target norm (non-negative float). a: The lower bound of the data domain (float). b: The upper bound of the data domain (float). Returns: eta: A batch of scales with the same number of dimensions as x but all axis == 1 except for the batch dimension. """ (x, delta), restore_fn = ep.astensors_(x, delta) N = x.shape[0] assert delta.shape[0] == N rows = ep.arange(x, N) delta2 = delta.square().reshape((N, -1)) space = ep.where(delta >= 0, b - x, x - a).reshape((N, -1)) f2 = space.square() / ep.maximum(delta2, 1e-20) ks = ep.argsort(f2, axis=-1) f2_sorted = f2[rows[:, ep.newaxis], ks] m = ep.cumsum(delta2[rows[:, ep.newaxis], ks.flip(axis=1)], axis=-1).flip(axis=1) dx = f2_sorted[:, 1:] - f2_sorted[:, :-1] dx = ep.concatenate((f2_sorted[:, :1], dx), axis=-1) dy = m * dx y = ep.cumsum(dy, axis=-1) c = y >= eps**2 # work-around to get first nonzero element in each row f = ep.arange(x, c.shape[-1], 0, -1) j = ep.argmax(c.astype(f.dtype) * f, axis=-1) eta2 = f2_sorted[rows, j] - (y[rows, j] - eps**2) / m[rows, j] # it can happen that for certain rows even the largest j is not large enough # (i.e. c[:, -1] is False), then we will just use it (without any correction) as it's # the best we can do (this should also be the only cases where m[j] can be # 0 and they are thus not a problem) eta2 = ep.where(c[:, -1], eta2, f2_sorted[:, -1]) eta = ep.sqrt(eta2) eta = eta.reshape((-1, ) + (1, ) * (x.ndim - 1)) # xp = ep.clip(x + eta * delta, a, b) # l2 = (xp - x).reshape((N, -1)).square().sum(axis=-1).sqrt() return restore_fn(eta)
def __call__(self, references: T, perturbed: T) -> T: """Calculates the distances from references to perturbed using the Lp norm. Args: references: A batch of reference inputs. perturbed: A batch of perturbed inputs. Returns: A 1D tensor with the distances from references to perturbed. """ (x, y), restore_type = ep.astensors_(references, perturbed) norms = ep.norms.lp(flatten(y - x), self.p, axis=-1) return restore_type(norms)
def test_astensors_tensors_restore(t: Tensor) -> None: ts = (t, t + 1, t + 2) rs = ts ys, restore_type = ep.astensors_(*rs) assert isinstance(ys, tuple) assert len(ts) == len(ys) for ti, yi in zip(ts, ys): assert (ti == yi).all() ys = tuple(y + 1 for y in ys) xs = restore_type(*ys) assert isinstance(xs, tuple) assert len(xs) == len(ys) for xi, ri in zip(xs, rs): assert type(xi) == type(ri) x0 = restore_type(ys[0]) assert not isinstance(x0, tuple) # type: ignore
def __call__(self, reference: T, perturbed: T) -> T: """Calculates the distance from reference to perturbed using the Lp norm. Parameters ---------- reference : T A batch of reference inputs. perturbed : T A batch of perturbed inputs. Returns ------- T Returns a batch of distances as a 1D tensor. """ (x, y), restore_type = ep.astensors_(reference, perturbed) norms = ep.norms.lp(flatten(y - x), self.p, axis=-1) return restore_type(norms)
def __call__(self, perturbed: T, outputs: T) -> T: args, restore_type = ep.astensors_(perturbed, outputs) a = self.a(*args) b = self.b(*args) is_adv = ep.logical_and(a, b) return restore_type(is_adv)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) if starting_points is None: raise ValueError("BinarizationRefinementAttack requires starting_points") (o, x), restore_type = ep.astensors_(inputs, starting_points) del inputs, starting_points, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if self.threshold is None: min_, max_ = model.bounds threshold = (min_ + max_) / 2.0 else: threshold = self.threshold assert o.dtype == x.dtype nptype = o.reshape(-1)[0].numpy().dtype.type if nptype not in [np.float16, np.float32, np.float64]: raise ValueError( # pragma: no cover f"expected dtype to be float16, float32 or float64, found '{nptype}'" ) threshold = nptype(threshold) offset = nptype(1.0) if self.included_in == "lower": lower_ = threshold upper_ = np.nextafter(threshold, threshold + offset) elif self.included_in == "upper": lower_ = np.nextafter(threshold, threshold - offset) upper_ = threshold else: raise ValueError( f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'" ) assert lower_ < upper_ p = ep.full_like(o, ep.nan) lower = ep.ones_like(o) * lower_ upper = ep.ones_like(o) * upper_ indices = ep.logical_and(o <= lower, x <= lower) p = ep.where(indices, o, p) indices = ep.logical_and(o <= lower, x >= upper) p = ep.where(indices, upper, p) indices = ep.logical_and(o >= upper, x <= lower) p = ep.where(indices, lower, p) indices = ep.logical_and(o >= upper, x >= upper) p = ep.where(indices, o, p) assert not ep.any(ep.isnan(p)) is_adv1 = is_adversarial(x) is_adv2 = is_adversarial(p) if (is_adv1 != is_adv2).any(): raise ValueError( "The specified threshold does not match what is done by the model." ) return restore_type(p)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: """For models that preprocess their inputs by binarizing the inputs, this attack can improve adversarials found by other attacks. It does this by utilizing information about the binarization and mapping values to the corresponding value in the clean input or to the right side of the threshold. Parameters ---------- threshold : float The treshold used by the models binarization. If none, defaults to (model.bounds()[1] - model.bounds()[0]) / 2. included_in : str Whether the threshold value itself belongs to the lower or upper interval. """ raise_if_kwargs(kwargs) if starting_points is None: raise ValueError( "BinarizationRefinementAttack requires starting_points") (o, x), restore_type = ep.astensors_(inputs, starting_points) del inputs, starting_points, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if self.threshold is None: min_, max_ = model.bounds threshold = (min_ + max_) / 2.0 else: threshold = self.threshold assert o.dtype == x.dtype nptype = o.reshape(-1)[0].numpy().dtype.type if nptype not in [np.float16, np.float32, np.float64]: raise ValueError( # pragma: no cover f"expected dtype to be float16, float32 or float64, found '{nptype}'" ) threshold = nptype(threshold) offset = nptype(1.0) if self.included_in == "lower": lower_ = threshold upper_ = np.nextafter(threshold, threshold + offset) elif self.included_in == "upper": lower_ = np.nextafter(threshold, threshold - offset) upper_ = threshold else: raise ValueError( f"expected included_in to be 'lower' or 'upper', found '{self.included_in}'" ) assert lower_ < upper_ p = ep.full_like(o, ep.nan) lower = ep.ones_like(o) * lower_ upper = ep.ones_like(o) * upper_ indices = ep.logical_and(o <= lower, x <= lower) p = ep.where(indices, o, p) indices = ep.logical_and(o <= lower, x >= upper) p = ep.where(indices, upper, p) indices = ep.logical_and(o >= upper, x <= lower) p = ep.where(indices, lower, p) indices = ep.logical_and(o >= upper, x >= upper) p = ep.where(indices, o, p) assert not ep.any(ep.isnan(p)) is_adv1 = is_adversarial(x) is_adv2 = is_adversarial(p) if (is_adv1 != is_adv2).any(): raise ValueError( "The specified threshold does not match what is done by the model." ) return restore_type(p)