def transform_bounds(self: ModelType, bounds: BoundsInput, inplace: bool = False) -> ModelType: """Returns a new model with the desired bounds and updates the preprocessing accordingly""" # more efficient than the base class implementation because it avoids the additional wrapper if self.bounds == bounds: if inplace: return self else: return copy.copy(self) a, b = self.bounds c, d = bounds f = (d - c) / (b - a) mean, std, flip_axis = self._preprocess_args if mean is None: mean = ep.zeros(self._dummy, 1) mean = f * (mean - a) + c if std is None: std = ep.ones(self._dummy, 1) std = f * std if inplace: model = self else: model = copy.copy(self) model._bounds = Bounds(*bounds) model._preprocess_args = (mean, std, flip_axis) return model
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def get_zig_zag_mask(self, frequence_range: Tuple[float, float], mask_shape: Tuple[int, int] = (8, 8)) -> Any: total_component = mask_shape[0] * mask_shape[1] n_coeff_kept = int(total_component * min(1, frequence_range[1])) n_coeff_to_start = int(total_component * max(0, frequence_range[0])) imsize = self._originals.shape mask_shape = (imsize[0], imsize[1], mask_shape[0], mask_shape[1]) mask = ep.zeros(self._originals, mask_shape).raw s = 0 while n_coeff_kept > 0: for i in range(min(s + 1, mask_shape[2])): for j in range(min(s + 1, mask_shape[3])): if i + j == s: if n_coeff_to_start > 0: n_coeff_to_start -= 1 continue if s % 2: mask[:, :, i, j] = 1 else: mask[:, :, j, i] = 1 n_coeff_kept -= 1 if n_coeff_kept == 0: return mask s += 1 return ep.astensor(mask)
def _get_best_theta( self, function_evolution: Callable[[ep.Tensor], ep.Tensor], best_params: ep.Tensor) -> ep.Tensor: v_type = function_evolution(best_params) coefficients = ep.zeros(v_type, 2 * self.T).raw for i in range(0, self.T): coefficients[2* i] = 1 - (i / self.T) coefficients[2 * i + 1] = - coefficients[2* i] for i, coeff in enumerate(coefficients): params = coeff * self.theta_max x_evol = function_evolution(params) x = ep.where( atleast_kd(best_params == 0, v_type.ndim), x_evol, ep.zeros_like(v_type)) is_advs = self._is_adversarial(x) best_params = ep.where( (best_params == 0) * is_advs, params, best_params ) if (best_params != 0).all(): break return best_params
def test_squeeze_not_one(dummy: Tensor, axis: Optional[AxisAxes]) -> None: t = ep.zeros(dummy, (3, 4, 5)) if axis is None: t.squeeze(axis=axis) else: with pytest.raises(Exception): # squeezing specifc axis should fail if they are not 1 t.squeeze(axis=axis)
def uniform_l1_n_balls(dummy: ep.Tensor, batch_size: int, n: int) -> ep.Tensor: # https://mathoverflow.net/a/9188 u = ep.uniform(dummy, (batch_size, n)) v = u.sort(axis=-1) vp = ep.concatenate([ep.zeros(v, (batch_size, 1)), v[:, :n - 1]], axis=-1) assert v.shape == vp.shape x = v - vp sign = ep.uniform(dummy, (batch_size, n), low=-1.0, high=1.0).sign() return sign * x
def test_tensorboard(logdir: Union[Literal[False], None, str], tmp_path: Any, dummy: ep.Tensor) -> None: if logdir == "temp": logdir = tmp_path if logdir: before = len(list(tmp_path.iterdir())) tb = fbn.tensorboard.TensorBoard(logdir) tb.scalar("a_scalar", 5, step=1) x = ep.ones(dummy, 10) tb.mean("a_mean", x, step=2) x = ep.ones(dummy, 10) == ep.arange(dummy, 10) tb.probability("a_probability", x, step=2) x = ep.arange(dummy, 10).float32() cond = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) tb.conditional_mean("a_conditional_mean", x, cond, step=2) x = ep.arange(dummy, 10).float32() cond = ep.ones(dummy, 10) == ep.zeros(dummy, 10) tb.conditional_mean("a_conditional_mean_false", x, cond, step=2) x = ep.ones(dummy, 10) == ep.arange(dummy, 10) y = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) tb.probability_ratio("a_probability_ratio", x, y, step=5) x = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) y = ep.ones(dummy, 10) == ep.zeros(dummy, 10) tb.probability_ratio("a_probability_ratio_y_zero", x, y, step=5) x = ep.arange(dummy, 10).float32() tb.histogram("a_histogram", x, step=9, first=False) tb.histogram("a_histogram", x, step=10, first=True) tb.close() if logdir: after = len(list(tmp_path.iterdir())) assert after > before # make sure something has been written
def test_plot(dummy: ep.Tensor) -> None: # just tests that the calls don't throw any errors images = ep.zeros(dummy, (10, 3, 32, 32)) fbn.plot.images(images) fbn.plot.images(images, n=3) fbn.plot.images(images, n=3, data_format="channels_first") fbn.plot.images(images, nrows=4) fbn.plot.images(images, ncols=3) fbn.plot.images(images, nrows=2, ncols=6) fbn.plot.images(images, nrows=2, ncols=4) # test for single channel images images = ep.zeros(dummy, (10, 32, 32, 1)) fbn.plot.images(images) with pytest.raises(ValueError): images = ep.zeros(dummy, (10, 3, 3, 3)) fbn.plot.images(images) with pytest.raises(ValueError): images = ep.zeros(dummy, (10, 1, 1, 1)) fbn.plot.images(images) with pytest.raises(ValueError): images = ep.zeros(dummy, (10, 32, 32)) fbn.plot.images(images) with pytest.raises(ValueError): images = ep.zeros(dummy, (10, 3, 32, 32)) fbn.plot.images(images, data_format="foo")
def _get_candidates(self, originals: ep.Tensor, best_advs: ep.Tensor) -> ep.Tensor: """ Find the lowest epsilon to misclassified x following the direction: q of class 1 / q + eps*direction of class 0 """ epsilons = ep.zeros(originals, len(originals)) direction_2 = ep.zeros_like(originals) while (epsilons == 0).any(): # if epsilon ==0, we are still searching a good direction direction_2 = ep.where( atleast_kd(epsilons == 0, direction_2.ndim), self._basis.get_vector(self._directions_ortho), direction_2 ) for i, eps_i in enumerate(epsilons): if eps_i == 0: self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i], direction_2[i].expand_dims(0)), axis=0) if len(self._directions_ortho[i]) > self.n_ortho + 1: self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i][:1], self._directions_ortho[i][self.n_ortho:])) function_evolution = self._get_evolution_function(originals, best_advs, direction_2) new_epsilons = self._get_best_theta(function_evolution, epsilons) self.theta_max = ep.where(new_epsilons == 0, self.theta_max * self.rho, self.theta_max) self.theta_max = ep.where((new_epsilons != 0) * (epsilons == 0), self.theta_max / self.rho, self.theta_max) epsilons = new_epsilons function_evolution = self._get_evolution_function(originals, best_advs, direction_2) if self.with_alpha_line_search: epsilons = self._binary_search_on_alpha(function_evolution, epsilons) epsilons = epsilons.expand_dims(0) if self.with_interpolation: epsilons = ep.concatenate((epsilons, epsilons[0] / 2), axis=0) candidates = ep.concatenate([function_evolution(eps).expand_dims(0) for eps in epsilons], axis=0) if self.with_interpolation: d = self.distance(best_advs, originals) delta = self.distance(self._binary_search(originals, candidates[1], boost=True), originals) theta_star = epsilons[0] num = theta_star * (4 * delta - d * (self._cos(theta_star.raw) + 3)) den = 4 * (2 * delta - d * (self._cos(theta_star.raw) + 1)) theta_hat = num / den q_interp = function_evolution(theta_hat) if self.with_distance_line_search: q_interp = self._binary_search(originals, q_interp, boost=True) candidates = ep.concatenate((candidates, q_interp.expand_dims(0)), axis=0) return candidates
def __call__(self, inputs, labels, *, epsilon, criterion, repeats=100, check_trivial=True): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = self.model.forward(p) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) min_, max_ = self.model.bounds() result = x0 if check_trivial: found = is_adversarial(result) else: found = ep.zeros(x0, len(result)).bool() for _ in range(repeats): if found.all(): break p = self.sample_noise(x0) norms = self.get_norms(p) p = p / atleast_kd(norms, p.ndim) x = x0 + epsilon * p x = x.clip(min_, max_) is_adv = is_adversarial(x) is_new_adv = ep.logical_and(is_adv, ep.logical_not(found)) result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result) found = ep.logical_or(found, is_adv) return result.tensor
def run( self, model: Model, inputs: T, criterion: Union[Criterion, Any] = None, *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs verify_input_bounds(x0, model) is_adversarial = get_is_adversarial(criterion_, model) min_, max_ = model.bounds result = x0 if self.check_trivial: found = is_adversarial(result) else: found = ep.zeros(x0, len(result)).bool() for _ in range(self.repeats): if found.all(): break p = self.sample_noise(x0) epsilons = self.get_epsilons(x0, p, epsilon, min_=min_, max_=max_) x = x0 + epsilons * p x = x.clip(min_, max_) is_adv = is_adversarial(x) is_new_adv = ep.logical_and(is_adv, ep.logical_not(found)) result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result) found = ep.logical_or(found, is_adv) return restore_type(result)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def run( self, model: Model, inputs: T, criterion: Misclassification, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if self.channel_axis is None: channel_axis = get_channel_axis(model, x0.ndim) else: channel_axis = self.channel_axis % x0.ndim if channel_axis is not None: shape[channel_axis] = 1 min_, max_ = model.bounds r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, tuple(shape)) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).norms.l2(axis=-1) closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore_type(result)
def test_flatten_4d(dummy: ep.Tensor) -> None: x = ep.zeros(dummy, (4, 5, 6, 7)) x = fbn.devutils.flatten(x) assert x.shape == (4, 210)
def test_repr(t: Tensor) -> None: assert not repr(t).startswith("<") t = ep.zeros(t, (10, 10)) assert not repr(t).startswith("<") assert len(repr(t).split("\n")) > 1
def test_zeros_tuple(t: Tensor) -> Tensor: return ep.zeros(t, (2, 3))
def test_zeros_scalar(t: Tensor) -> Tensor: return ep.zeros(t, 5)
def __call__( self, model: Model, inputs, labels, *, criterion=misclassification, channel_axis: Optional[int] = None, ): """ Parameters ---------- channel_axis The axis across which the noise should be the same (if across_channels is True). If None, will be automatically inferred from the model if possible. """ inputs, labels, restore = wrap(inputs, labels) is_adversarial = get_is_adversarial(criterion, inputs, labels, model) x0 = inputs N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if channel_axis is None and not hasattr(model, "data_format"): raise ValueError( "cannot infer the data_format from the model, please specify" " channel_axis when calling the attack") elif channel_axis is None: data_format = model.data_format # type: ignore if (data_format is None or data_format != "channels_first" and data_format != "channels_last"): raise ValueError( f"expected data_format to be 'channels_first' or 'channels_last'" ) channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1 elif not 0 <= channel_axis < x0.ndim: raise ValueError( f"expected channel_axis to be in [0, {x0.ndim})") shape[channel_axis] = 1 min_, max_ = model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore(result)
def __call__( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], ) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels change_classes_logits = self.confidence elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes change_classes_logits = -self.confidence else: raise ValueError("unsupported criterion") def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: if change_classes_logits != 0: logits += ep.onehot_like(logits, classes, value=change_classes_logits) return criterion_(perturbed, logits) if classes.shape != (N,): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}" ) min_, max_ = model.bounds rows = range(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: assert y_k.shape == x.shape assert consts.shape == (N,) logits = model(y_k) if targeted: c_minimize = best_other_classes(logits, classes) c_maximize = classes else: c_minimize = classes c_maximize = best_other_classes(logits, classes) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N,) is_adv_loss = is_adv_loss + self.confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(y_k - x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, logits loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = self.initial_const * ep.ones(x, (N,)) lower_bounds = ep.zeros(x, (N,)) upper_bounds = ep.inf * ep.ones(x, (N,)) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N,)) * ep.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( binary_search_step == self.binary_search_steps - 1 and self.binary_search_steps >= 10 ): # in the last iteration, repeat the search once consts = ep.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss x_k = x y_k = x found_advs = ep.full( x, (N,), value=False ).bool() # found adv with the current consts loss_at_previous_check = ep.ones(x, (1,)) * ep.inf for iteration in range(self.steps): # square-root learning rate decay stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5 loss, logits, gradient = loss_aux_and_grad(y_k, consts) x_k_old = x_k x_k = project_shrinkage_thresholding( y_k - stepsize * gradient, x, self.regularization, min_, max_ ) y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old) if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0: # after each tenth of the iterations, check progress # TODO: loss is a scalar ep tensor. is this the bst way to # implement the condition? if not ep.all(loss <= 0.9999 * loss_at_previous_check): break # stop optimization if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adversarial(x_k, logits) best_advs, best_advs_norms = apply_decision_rule( self.decision_rule, self.regularization, best_advs, best_advs_norms, x_k, x, found_advs_iter, ) found_advs = ep.logical_or(found_advs, found_advs_iter) upper_bounds = ep.where(found_advs, consts, upper_bounds) lower_bounds = ep.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = ep.where( ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search ) return restore_type(best_advs)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") stepsize = 1.0 min_, max_ = model.bounds def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) delta = ep.zeros_like(x) epsilon = self.init_epsilon * ep.ones(x, len(x)) worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1) best_l2 = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of LR starting from 1.0 to 0.01 stepsize = (0.01 + (stepsize - 0.01) * (1 + math.cos(math.pi * i / self.steps)) / 2) x_adv = x + delta _, logits, gradients = grad_and_logits(x_adv, classes) gradients = normalize_gradient_l2_norms(gradients) is_adversarial = criterion_(x_adv, logits) l2 = ep.norms.l2(flatten(delta), axis=-1) is_smaller = l2 <= best_l2 is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_l2 = ep.where(is_both, l2, best_l2) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # do step delta = delta + stepsize * gradients epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma, 1.0 + self.gamma) epsilon = ep.minimum(epsilon, worst_norm) # project to epsilon ball delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1), x.ndim) # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def run( self, model: Model, inputs: T, criterion: TargetedMisclassification, *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs N = len(x) if isinstance(criterion, TargetedMisclassification): classes = criterion.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): raise ValueError( f"expected target_classes to have shape ({N},), got {classes.shape}" ) noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]] channel_axis: Optional[int] = None if self.reduced_dims is not None: if x.ndim != 4: raise NotImplementedError( "only implemented for inputs with two spatial dimensions" " (and one channel and one batch dimension)") if self.channel_axis is None: maybe_axis = get_channel_axis(model, x.ndim) if maybe_axis is None: raise ValueError( "cannot infer the data_format from the model, please" " specify channel_axis when initializing the attack") else: channel_axis = maybe_axis else: channel_axis = self.channel_axis % x.ndim if channel_axis == 1: noise_shape = (x.shape[1], *self.reduced_dims) elif channel_axis == 3: noise_shape = (*self.reduced_dims, x.shape[3]) else: raise ValueError( "expected 'channel_axis' to be 1 or 3, got {channel_axis}") else: noise_shape = x.shape[1:] # pragma: no cover def is_adversarial(logits: ep.TensorType) -> ep.TensorType: return ep.argmax(logits, 1) == classes num_plateaus = ep.zeros(x, len(x)) mutation_probability = (ep.ones_like(num_plateaus) * self.min_mutation_probability) mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range noise_pops = ep.uniform(x, (N, self.population, *noise_shape), -epsilon, epsilon) def calculate_fitness(logits: ep.TensorType) -> ep.TensorType: first = logits[range(N), classes] second = ep.log(ep.exp(logits).sum(1) - first) return first - second n_its_wo_change = ep.zeros(x, (N, )) for step in range(self.steps): fitness_l, is_adv_l = [], [] for i in range(self.population): it = self.apply_noise(x, noise_pops[:, i], epsilon, channel_axis) logits = model(it) f = calculate_fitness(logits) a = is_adversarial(logits) fitness_l.append(f) is_adv_l.append(a) fitness = ep.stack(fitness_l) is_adv = ep.stack(is_adv_l, 1) elite_idxs = ep.argmax(fitness, 0) elite_noise = noise_pops[range(N), elite_idxs] is_adv = is_adv[range(N), elite_idxs] # early stopping if is_adv.all(): return restore_type( # pragma: no cover self.apply_noise(x, elite_noise, epsilon, channel_axis)) probs = ep.softmax(fitness / self.sampling_temperature, 0) parents_idxs = np.stack( [ self.choice( self.population, 2 * self.population - 2, replace=True, p=probs[:, i], ) for i in range(N) ], 1, ) mutations = [ ep.uniform( x, noise_shape, -mutation_range[i].item() * epsilon, mutation_range[i].item() * epsilon, ) for i in range(N) ] new_noise_pops = [elite_noise] for i in range(0, self.population - 1): parents_1 = noise_pops[range(N), parents_idxs[2 * i]] parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]] # calculate crossover p = probs[parents_idxs[2 * i], range(N)] / ( probs[parents_idxs[2 * i], range(N)] + probs[parents_idxs[2 * i + 1], range(N)]) p = atleast_kd(p, x.ndim) p = ep.tile(p, (1, *noise_shape)) crossover_mask = ep.uniform(p, p.shape, 0, 1) < p children = ep.where(crossover_mask, parents_1, parents_2) # calculate mutation mutation_mask = ep.uniform(children, children.shape) mutation_mask = mutation_mask <= atleast_kd( mutation_probability, children.ndim) children = ep.where(mutation_mask, children + mutations[i], children) # project back to epsilon range children = ep.clip(children, -epsilon, epsilon) new_noise_pops.append(children) noise_pops = ep.stack(new_noise_pops, 1) # increase num_plateaus if fitness does not improve # for 100 consecutive steps n_its_wo_change = ep.where(elite_idxs == 0, n_its_wo_change + 1, ep.zeros_like(n_its_wo_change)) num_plateaus = ep.where(n_its_wo_change >= 100, num_plateaus + 1, num_plateaus) n_its_wo_change = ep.where(n_its_wo_change >= 100, ep.zeros_like(n_its_wo_change), n_its_wo_change) mutation_probability = ep.maximum( self.min_mutation_probability, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) mutation_range = ep.maximum( self.min_mutation_range, 0.5 * ep.exp( math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus), ) return restore_type( self.apply_noise(x, elite_noise, epsilon, channel_axis))
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) criterion_ = get_criterion(criterion) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(inputs) if targeted: c_minimize = best_other_classes(logits, labels) c_maximize = labels # target_classes else: c_minimize = labels # labels c_maximize = best_other_classes(logits, labels) loss = logits[rows, c_minimize] - logits[rows, c_maximize] return -loss.sum(), (logits, loss) x, restore_type = ep.astensor_(inputs) del inputs, criterion, kwargs N = len(x) # start from initialization points/attack if starting_points is not None: x1 = starting_points else: if self.init_attack is not None: x1 = self.init_attack.run(model, x, criterion_) else: x1 = None # if initial points or initialization attacks are provided, # search for the boundary if x1 is not None: is_adv = get_is_adversarial(criterion_, model) assert is_adv(x1).all() lower_bound = ep.zeros(x, shape=(N, )) upper_bound = ep.ones(x, shape=(N, )) for _ in range(self.binary_search_steps): epsilons = (lower_bound + upper_bound) / 2 mid_points = self.mid_points(x, x1, epsilons, model.bounds) is_advs = is_adv(mid_points) lower_bound = ep.where(is_advs, lower_bound, epsilons) upper_bound = ep.where(is_advs, epsilons, upper_bound) starting_points = self.mid_points(x, x1, upper_bound, model.bounds) delta = starting_points - x else: # start from x0 delta = ep.zeros_like(x) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds rows = range(N) grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) if self.p != 0: epsilon = ep.inf * ep.ones(x, len(x)) else: epsilon = ep.ones(x, len(x)) if x1 is None \ else ep.norms.l0(flatten(delta), axis=-1) if self.p != 0: worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)), p=self.p, axis=-1) else: worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32() best_lp = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of learning rates stepsize = (self.min_stepsize + (self.max_stepsize - self.min_stepsize) * (1 + math.cos(math.pi * i / self.steps)) / 2) gamma = (0.001 + (self.gamma - 0.001) * (1 + math.cos(math.pi * (i / self.steps))) / 2) x_adv = x + delta loss, (logits, loss_batch), gradients = grad_and_logits(x_adv, classes) is_adversarial = criterion_(x_adv, logits) lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1) is_smaller = lp <= best_lp is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_lp = ep.where(is_both, lp, best_lp) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # update epsilon if self.p != 0: distance_to_boundary = abs(loss_batch) / ep.norms.lp( flatten(gradients), p=self.dual, axis=-1) epsilon = ep.where( is_adversarial, ep.minimum( epsilon * (1 - gamma), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.where( adv_found, epsilon * (1 + gamma), ep.norms.lp(flatten(delta), p=self.p, axis=-1) + distance_to_boundary)) else: epsilon = ep.where( is_adversarial, ep.minimum( ep.minimum(epsilon - 1, (epsilon * (1 - gamma)).astype(int).astype( epsilon.dtype)), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.maximum(epsilon + 1, (epsilon * (1 + gamma)).astype(int).astype( epsilon.dtype))) epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype) # clip epsilon epsilon = ep.minimum(epsilon, worst_norm) # computes normalized gradient update grad_ = self.normalize(gradients, x=x, bounds=model.bounds) * stepsize # do step delta = delta + grad_ # project according to the given norm delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def test_atleast_kd_1d(dummy: ep.Tensor, k: int) -> None: x = ep.zeros(dummy, (10,)) x = fbn.devutils.atleast_kd(x, k) assert x.shape[0] == 10 assert x.ndim == k
def test_atleast_kd_3d(dummy: ep.Tensor, k: int) -> None: x = ep.zeros(dummy, (10, 5, 3)) x = fbn.devutils.atleast_kd(x, k) assert x.shape[:3] == (10, 5, 3) assert x.ndim == max(k, 3)
def __call__(self, inputs, labels, *, criterion, steps=1000): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = ep.astensor(self.model.forward(p.tensor)) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) N = len(x0) shape = list(x0.shape) if self.channel_axis is not None: shape[self.channel_axis] = 1 min_, max_ = self.model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / steps p = stepsizes for step in range(steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial(x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where( is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability ) remaining = steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes ) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return result.tensor