def _get_best_theta( self, function_evolution: Callable[[ep.Tensor], ep.Tensor], best_params: ep.Tensor) -> ep.Tensor: v_type = function_evolution(best_params) coefficients = ep.zeros(v_type, 2 * self.T).raw for i in range(0, self.T): coefficients[2* i] = 1 - (i / self.T) coefficients[2 * i + 1] = - coefficients[2* i] for i, coeff in enumerate(coefficients): params = coeff * self.theta_max x_evol = function_evolution(params) x = ep.where( atleast_kd(best_params == 0, v_type.ndim), x_evol, ep.zeros_like(v_type)) is_advs = self._is_adversarial(x) best_params = ep.where( (best_params == 0) * is_advs, params, best_params ) if (best_params != 0).all(): break return best_params
def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. highs = ep.ones(perturbed, len(perturbed)) d = np.prod(perturbed.shape[1:]) thresholds = self._BS_gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # Boost Binary search if boost: boost_vec = 0.1 * originals + 0.9 * perturbed is_advs = self._is_adversarial(boost_vec) is_advs = atleast_kd(is_advs, originals.ndim) originals = ep.where(is_advs.logical_not(), boost_vec, originals) perturbed = ep.where(is_advs, boost_vec, perturbed) # use this variable to check when mids stays constant and the BS has converged old_mids = highs iteration = 0 while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration: iteration += 1 mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = self._is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: break results = self._project(originals, perturbed, highs) return results
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def l2_clipping_aware_rescaling(x, delta, eps: float, a: float = 0.0, b: float = 1.0): # type: ignore """Calculates eta such that norm(clip(x + eta * delta, a, b) - x) == eps. Assumes x and delta have a batch dimension and eps, a, b, and p are scalars. If the equation cannot be solved because eps is too large, the left hand side is maximized. Args: x: A batch of inputs (PyTorch Tensor, TensorFlow Eager Tensor, NumPy Array, JAX Array, or EagerPy Tensor). delta: A batch of perturbation directions (same shape and type as x). eps: The target norm (non-negative float). a: The lower bound of the data domain (float). b: The upper bound of the data domain (float). Returns: eta: A batch of scales with the same number of dimensions as x but all axis == 1 except for the batch dimension. """ (x, delta), restore_fn = ep.astensors_(x, delta) N = x.shape[0] assert delta.shape[0] == N rows = ep.arange(x, N) delta2 = delta.square().reshape((N, -1)) space = ep.where(delta >= 0, b - x, x - a).reshape((N, -1)) f2 = space.square() / ep.maximum(delta2, 1e-20) ks = ep.argsort(f2, axis=-1) f2_sorted = f2[rows[:, ep.newaxis], ks] m = ep.cumsum(delta2[rows[:, ep.newaxis], ks.flip(axis=1)], axis=-1).flip(axis=1) dx = f2_sorted[:, 1:] - f2_sorted[:, :-1] dx = ep.concatenate((f2_sorted[:, :1], dx), axis=-1) dy = m * dx y = ep.cumsum(dy, axis=-1) c = y >= eps**2 # work-around to get first nonzero element in each row f = ep.arange(x, c.shape[-1], 0, -1) j = ep.argmax(c.astype(f.dtype) * f, axis=-1) eta2 = f2_sorted[rows, j] - (y[rows, j] - eps**2) / m[rows, j] # it can happen that for certain rows even the largest j is not large enough # (i.e. c[:, -1] is False), then we will just use it (without any correction) as it's # the best we can do (this should also be the only cases where m[j] can be # 0 and they are thus not a problem) eta2 = ep.where(c[:, -1], eta2, f2_sorted[:, -1]) eta = ep.sqrt(eta2) eta = eta.reshape((-1, ) + (1, ) * (x.ndim - 1)) # xp = ep.clip(x + eta * delta, a, b) # l2 = (xp - x).reshape((N, -1)).square().sum(axis=-1).sqrt() return restore_fn(eta)
def _get_candidates(self, originals: ep.Tensor, best_advs: ep.Tensor) -> ep.Tensor: """ Find the lowest epsilon to misclassified x following the direction: q of class 1 / q + eps*direction of class 0 """ epsilons = ep.zeros(originals, len(originals)) direction_2 = ep.zeros_like(originals) while (epsilons == 0).any(): # if epsilon ==0, we are still searching a good direction direction_2 = ep.where( atleast_kd(epsilons == 0, direction_2.ndim), self._basis.get_vector(self._directions_ortho), direction_2 ) for i, eps_i in enumerate(epsilons): if eps_i == 0: self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i], direction_2[i].expand_dims(0)), axis=0) if len(self._directions_ortho[i]) > self.n_ortho + 1: self._directions_ortho[i] = ep.concatenate((self._directions_ortho[i][:1], self._directions_ortho[i][self.n_ortho:])) function_evolution = self._get_evolution_function(originals, best_advs, direction_2) new_epsilons = self._get_best_theta(function_evolution, epsilons) self.theta_max = ep.where(new_epsilons == 0, self.theta_max * self.rho, self.theta_max) self.theta_max = ep.where((new_epsilons != 0) * (epsilons == 0), self.theta_max / self.rho, self.theta_max) epsilons = new_epsilons function_evolution = self._get_evolution_function(originals, best_advs, direction_2) if self.with_alpha_line_search: epsilons = self._binary_search_on_alpha(function_evolution, epsilons) epsilons = epsilons.expand_dims(0) if self.with_interpolation: epsilons = ep.concatenate((epsilons, epsilons[0] / 2), axis=0) candidates = ep.concatenate([function_evolution(eps).expand_dims(0) for eps in epsilons], axis=0) if self.with_interpolation: d = self.distance(best_advs, originals) delta = self.distance(self._binary_search(originals, candidates[1], boost=True), originals) theta_star = epsilons[0] num = theta_star * (4 * delta - d * (self._cos(theta_star.raw) + 3)) den = 4 * (2 * delta - d * (self._cos(theta_star.raw) + 1)) theta_hat = num / den q_interp = function_evolution(theta_hat) if self.with_distance_line_search: q_interp = self._binary_search(originals, q_interp, boost=True) candidates = ep.concatenate((candidates, q_interp.expand_dims(0)), axis=0) return candidates
def _project_shrinkage_thresholding( z: ep.Tensor, x0: ep.Tensor, regularization: float, min_: float, max_: float ) -> ep.Tensor: """Performs the element-wise projected shrinkage-thresholding operation""" upper_mask = z - x0 > regularization lower_mask = z - x0 < -regularization projection = ep.where(upper_mask, ep.minimum(z - regularization, max_), x0) projection = ep.where(lower_mask, ep.maximum(z + regularization, min_), projection) return projection
def approximate_gradients( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], x_advs: ep.Tensor, steps: int, delta: ep.Tensor, ) -> ep.Tensor: # (steps, bs, ...) noise_shape = tuple([steps] + list(x_advs.shape)) if self.constraint == "l2": rv = ep.normal(x_advs, noise_shape) elif self.constraint == "linf": rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape) rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12 scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv perturbed = ep.expand_dims(x_advs, 0) + scaled_rv perturbed = ep.clip(perturbed, 0, 1) rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0), rv.ndim) multipliers_list: List[ep.Tensor] = [] for step in range(steps): decision = is_adversarial(perturbed[step]) multipliers_list.append( ep.where( decision, ep.ones( x_advs, (len(x_advs, )), ), -ep.ones( x_advs, (len(decision, )), ), )) # (steps, bs, ...) multipliers = ep.stack(multipliers_list, 0) vals = ep.where( ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1, multipliers, multipliers - ep.mean(multipliers, axis=0, keepdims=True), ) grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0) grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12 return grad
def normalize(self, gradients: ep.Tensor, *, x: ep.Tensor, bounds: Bounds) -> ep.Tensor: bad_pos = ep.logical_or( ep.logical_and(x == bounds.lower, gradients < 0), ep.logical_and(x == bounds.upper, gradients > 0), ) gradients = ep.where(bad_pos, ep.zeros_like(gradients), gradients) abs_gradients = gradients.abs() quantiles = np.quantile(flatten(abs_gradients).numpy(), q=self.quantile, axis=-1) keep = abs_gradients >= atleast_kd(ep.from_numpy(gradients, quantiles), gradients.ndim) e = ep.where(keep, gradients.sign(), ep.zeros_like(gradients)) return normalize_lp_norms(e, p=1)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[ep.Tensor] = None, **kwargs: Any, ) -> T: originals, restore_type = ep.astensor_(inputs) self._nqueries = {i: 0 for i in range(len(originals))} self._set_cos_sin_function(originals) self.theta_max = ep.ones(originals, len(originals)) * self._theta_max criterion = get_criterion(criterion) self._criterion_is_adversarial = get_is_adversarial(criterion, model) # Get Starting Point if starting_points is not None: best_advs = starting_points elif starting_points is None: init_attack: MinimizationAttack = LinearSearchBlendedUniformNoiseAttack(steps=50) best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: raise ValueError("starting_points {} doesn't exist.".format(starting_points)) assert self._is_adversarial(best_advs).all() # Initialize the direction orthogonalized with the first direction fd = best_advs - originals norm = ep.norms.l2(fd.flatten(1), axis=1) fd = fd / atleast_kd(norm, fd.ndim) self._directions_ortho = {i: v.expand_dims(0) for i, v in enumerate(fd)} # Load Basis if "basis_params" in kwargs: self._basis = Basis(originals, **kwargs["basis_params"]) else: self._basis = Basis(originals) for _ in range(self._steps): # Get candidates. Shape: (n_candidates, batch_size, image_size) candidates = self._get_candidates(originals, best_advs) candidates = candidates.transpose((1, 0, 2, 3, 4)) best_candidates = ep.zeros_like(best_advs).raw for i, o in enumerate(originals): o_repeated = ep.concatenate([o.expand_dims(0)] * len(candidates[i]), axis=0) index = ep.argmax(self.distance(o_repeated, candidates[i])).raw best_candidates[i] = candidates[i][index].raw is_success = self.distance(best_candidates, originals) < self.distance(best_advs, originals) best_advs = ep.where(atleast_kd(is_success, best_candidates.ndim), ep.astensor(best_candidates), best_advs) if all(v > self._max_queries for v in self._nqueries.values()): print("Max queries attained for all the images.") break return restore_type(best_advs)
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x best = ep.ones(x, len(x)) epsilon = 0.0 stepsize = 1.0 / self.steps for _ in range(self.steps): # TODO: reduce the batch size to the ones that have not yet been sucessful is_adv = is_adversarial(x + epsilon * direction) is_best_adv = ep.logical_and(is_adv, best == 1) best = ep.where(is_best_adv, epsilon, best) if (best < 1).all(): break epsilon += stepsize eps = atleast_kd(best, x.ndim) xp = x + eps * direction return restore_type(xp)
def mid_points( self, x0: ep.Tensor, x1: ep.Tensor, epsilons: ep.Tensor, bounds: Tuple[float, float], ): # returns a point between x0 and x1 where # epsilon = 0 returns x0 and epsilon = 1 delta = x1 - x0 min_, max_ = bounds s = max_ - min_ # get epsilons in right shape for broadcasting epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1)) clipped_delta = ep.where(delta < -epsilons * s, -epsilons * s, delta) clipped_delta = ep.where(clipped_delta > epsilons * s, epsilons * s, clipped_delta) return x0 + clipped_delta
def __call__(self, inputs, labels, *, steps=1000): x = ep.astensor(inputs) y = ep.astensor(labels) assert x.shape[0] == y.shape[0] assert y.ndim == 1 assert x.ndim == 4 if self.channel_axis == 1: h, w = x.shape[2:4] elif self.channel_axis == 3: h, w = x.shape[1:3] else: raise ValueError( "expected 'channel_axis' to be 1 or 3, got {channel_axis}") size = max(h, w) min_, max_ = self.model.bounds() x0 = x x0np = x0.numpy() epsilons = np.linspace(0, 1, num=steps + 1)[1:] logits = ep.astensor(self.model.forward(x0.tensor)) classes = logits.argmax(axis=-1) is_adv = classes != labels found = is_adv result = x0 for epsilon in epsilons: # TODO: reduce the batch size to the ones that haven't been sucessful sigmas = [epsilon * size] * 4 sigmas[0] = 0 sigmas[self.channel_axis] = 0 # TODO: once we can implement gaussian_filter in eagerpy, avoid converting from numpy x = gaussian_filter(x0np, sigmas) x = np.clip(x, min_, max_) x = ep.from_numpy(x0, x) logits = ep.astensor(self.model.forward(x.tensor)) classes = logits.argmax(axis=-1) is_adv = classes != labels new_adv = ep.logical_and(is_adv, found.logical_not()) result = ep.where(atleast_kd(new_adv, x.ndim), x, result) found = ep.logical_or(new_adv, found) if found.all(): break return result.tensor
def _binary_search( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], originals: ep.Tensor, perturbed: ep.Tensor, ) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. d = np.prod(perturbed.shape[1:]) if self.constraint == "linf": highs = linf(originals, perturbed) # TODO: Check if the threshold is correct # empirically this seems to be too low thresholds = highs * self.gamma / (d * d) else: highs = ep.ones(perturbed, len(perturbed)) thresholds = self.gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # use this variable to check when mids stays constant and the BS has converged old_mids = highs while ep.any(highs - lows > thresholds): mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: # TODO: warn user break res = self._project(originals, perturbed, highs) return res
def project(self, x: ep.Tensor, x0: ep.Tensor, epsilon: ep.Tensor) -> ep.Tensor: flatten_delta = flatten(x - x0) abs_delta = abs(flatten_delta) epsilon = epsilon.astype(int) rows = range(flatten_delta.shape[0]) idx_sorted = ep.argsort(abs_delta, axis=-1)[rows, -epsilon] thresholds = (ep.ones_like(flatten_delta).T * abs_delta[rows, idx_sorted]).T clipped = ep.where(abs_delta >= thresholds, flatten_delta, 0) return x0 + clipped.reshape(x0.shape).astype(x0.dtype)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) found = is_adversarial(x) results = x def grid_search_generator() -> Generator[Any, Any, Any]: dphis = np.linspace(-self.max_rot, self.max_rot, self.num_rots) dxs = np.linspace(-self.max_trans, self.max_trans, self.num_trans) dys = np.linspace(-self.max_trans, self.max_trans, self.num_trans) for dphi in dphis: for dx in dxs: for dy in dys: yield dphi, dx, dy def random_search_generator() -> Generator[Any, Any, Any]: dphis = np.random.uniform(-self.max_rot, self.max_rot, self.random_steps) dxs = np.random.uniform(-self.max_trans, self.max_trans, self.random_steps) dys = np.random.uniform(-self.max_trans, self.max_trans, self.random_steps) for dphi, dx, dy in zip(dphis, dxs, dys): yield dphi, dx, dy gen = grid_search_generator( ) if self.grid_search else random_search_generator() for dphi, dx, dy in gen: # TODO: reduce the batch size to the ones that haven't been successful x_p = rotate_and_shift(x, translation=(dx, dy), rotation=dphi) is_adv = is_adversarial(x_p) new_adv = ep.logical_and(is_adv, found.logical_not()) results = ep.where(atleast_kd(new_adv, x_p.ndim), x_p, results) found = ep.logical_or(new_adv, found) if found.all(): break # all images in batch misclassified return restore_type(results)
def _binary_search( self, x_adv_flat: ep.Tensor, mask: Union[ep.Tensor, List[bool]], mask_indices: ep.Tensor, indices: Union[ep.Tensor, List[int]], adv_values: ep.Tensor, non_adv_values: ep.Tensor, original_shape: Tuple, is_adversarial: Callable, ) -> ep.Tensor: for i in range(10): next_values = (adv_values + non_adv_values) / 2 x_adv_flat = ep.index_update( x_adv_flat, (mask_indices, indices), next_values ) is_adv = is_adversarial(x_adv_flat.reshape(original_shape))[mask] adv_values = ep.where(is_adv, next_values, adv_values) non_adv_values = ep.where(is_adv, non_adv_values, next_values) return adv_values
def normalize_gradient_l2_norms(grad: ep.Tensor) -> ep.Tensor: norms = ep.norms.l2(flatten(grad), -1) # remove zero gradients grad = ep.where(atleast_kd(norms == 0, grad.ndim), ep.normal(grad, shape=grad.shape), grad) # calculate norms again for previously vanishing elements norms = ep.norms.l2(flatten(grad), -1) norms = ep.maximum(norms, 1e-12) # avoid division by zero factor = 1 / norms factor = atleast_kd(factor, grad.ndim) return grad * factor
def _apply_decision_rule( decision_rule: Union[Literal["EN"], Literal["L1"]], beta: float, best_advs: ep.Tensor, best_advs_norms: ep.Tensor, x_k: ep.Tensor, x: ep.Tensor, found_advs: ep.Tensor, ) -> Tuple[ep.Tensor, ep.Tensor]: if decision_rule == "EN": norms = beta * flatten(x_k - x).abs().sum( axis=-1) + flatten(x_k - x).square().sum(axis=-1) else: # decision rule = L1 norms = flatten(x_k - x).abs().sum(axis=-1) new_best = ep.logical_and(norms < best_advs_norms, found_advs) new_best_kd = atleast_kd(new_best, best_advs.ndim) best_advs = ep.where(new_best_kd, x_k, best_advs) best_advs_norms = ep.where(new_best, norms, best_advs_norms) return best_advs, best_advs_norms
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) self.process_raw() assert self.inputs is not None assert self.outputs is not None x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) result = x found = criterion(x, model(x)) batch_size = len(x) # for every sample try every other sample index_pools: List[List[int]] = [] for i in range(batch_size): indices = list(range(batch_size)) indices.remove(i) indices = list(indices) np.random.shuffle(indices) index_pools.append(indices) for i in range(batch_size - 1): if found.all(): break indices = np.array([pool[i] for pool in index_pools]) xp = self.inputs[indices] yp = self.outputs[indices] is_adv = criterion(xp, yp) new_found = ep.logical_and(is_adv, found.logical_not()) result = ep.where(atleast_kd(new_found, result.ndim), xp, result) found = ep.logical_or(found, new_found) return restore_type(result)
def _project(self, originals: ep.Tensor, perturbed: ep.Tensor, epsilons: ep.Tensor) -> ep.Tensor: """Clips the perturbations to epsilon and returns the new perturbed Args: originals: A batch of reference inputs. perturbed: A batch of perturbed inputs. epsilons: A batch of norm values to project to. Returns: A tensor like perturbed but with the perturbation clipped to epsilon. """ epsilons = atleast_kd(epsilons, originals.ndim) if self.constraint == "linf": perturbation = perturbed - originals # ep.clip does not support tensors as min/max clipped_perturbed = ep.where(perturbation > epsilons, originals + epsilons, perturbed) clipped_perturbed = ep.where(perturbation < -epsilons, originals - epsilons, clipped_perturbed) return clipped_perturbed else: return (1.0 - epsilons) * originals + epsilons * perturbed
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def test_pointwise_targeted_attack( request: Any, fmodel_and_data_ext_for_attacks: ModeAndDataAndDescription, attack: fa.PointwiseAttack, ) -> None: (fmodel, x, y), real, low_dimensional_input = fmodel_and_data_ext_for_attacks if not low_dimensional_input or not real: pytest.skip() x = (x - fmodel.bounds.lower) / (fmodel.bounds.upper - fmodel.bounds.lower) fmodel = fmodel.transform_bounds((0, 1)) init_attack = fa.SaltAndPepperNoiseAttack(steps=50) init_advs = init_attack.run(fmodel, x, y) logits = fmodel(init_advs) num_classes = logits.shape[-1] target_classes = logits.argmax(-1) target_classes = ep.where(target_classes == y, (target_classes + 1) % num_classes, target_classes) criterion = fbn.TargetedMisclassification(target_classes) advs = attack.run(fmodel, x, criterion, starting_points=init_advs) init_norms_l0 = ep.norms.lp(flatten(init_advs - x), p=0, axis=-1) norms_l0 = ep.norms.lp(flatten(advs - x), p=0, axis=-1) init_norms_l2 = ep.norms.lp(flatten(init_advs - x), p=2, axis=-1) norms_l2 = ep.norms.lp(flatten(advs - x), p=2, axis=-1) is_smaller_l0 = norms_l0 < init_norms_l0 is_smaller_l2 = norms_l2 < init_norms_l2 assert fbn.accuracy(fmodel, advs, y) < fbn.accuracy(fmodel, x, y) assert fbn.accuracy(fmodel, advs, y) <= fbn.accuracy(fmodel, init_advs, y) assert fbn.accuracy(fmodel, advs, target_classes) > fbn.accuracy( fmodel, x, target_classes) assert fbn.accuracy(fmodel, advs, target_classes) >= fbn.accuracy( fmodel, init_advs, target_classes) assert is_smaller_l2.any() assert is_smaller_l0.any()
def mid_points( self, x0: ep.Tensor, x1: ep.Tensor, epsilons: ep.Tensor, bounds: Tuple[float, float], ) -> ep.Tensor: # returns a point between x0 and x1 where # epsilon = 0 returns x0 and epsilon = 1 # returns x1 # get epsilons in right shape for broadcasting epsilons = epsilons.reshape(epsilons.shape + (1, ) * (x0.ndim - 1)) threshold = (bounds[1] - bounds[0]) * (1 - epsilons) mask = (x1 - x0).abs() > threshold new_x = ep.where(mask, x0 + (x1 - x0).sign() * ((x1 - x0).abs() - threshold), x0) return new_x
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x best = ep.ones(x, len(x)) epsilon = 0.0 stepsize = 1.0 / self.steps for _ in range(self.steps): # TODO: reduce the batch size to the ones that have not yet been sucessful is_adv = is_adversarial(x + epsilon * direction) is_best_adv = ep.logical_and(is_adv, best == 1) best = ep.where(is_best_adv, epsilon, best) if (best < 1).all(): break # pragma: no cover epsilon += stepsize eps = atleast_kd(best, x.ndim) xp = x + eps * direction return restore_type(xp)
def __call__(self, inputs, labels, *, epsilon, criterion, repeats=100, check_trivial=True): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = self.model.forward(p) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) min_, max_ = self.model.bounds() result = x0 if check_trivial: found = is_adversarial(result) else: found = ep.zeros(x0, len(result)).bool() for _ in range(repeats): if found.all(): break p = self.sample_noise(x0) norms = self.get_norms(p) p = p / atleast_kd(norms, p.ndim) x = x0 + epsilon * p x = x.clip(min_, max_) is_adv = is_adversarial(x) is_new_adv = ep.logical_and(is_adv, ep.logical_not(found)) result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result) found = ep.logical_or(found, is_adv) return result.tensor
def run( self, model: Model, inputs: T, criterion: Union[Criterion, Any] = None, *, epsilon: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs verify_input_bounds(x0, model) is_adversarial = get_is_adversarial(criterion_, model) min_, max_ = model.bounds result = x0 if self.check_trivial: found = is_adversarial(result) else: found = ep.zeros(x0, len(result)).bool() for _ in range(self.repeats): if found.all(): break p = self.sample_noise(x0) epsilons = self.get_epsilons(x0, p, epsilon, min_=min_, max_=max_) x = x0 + epsilons * p x = x.clip(min_, max_) is_adv = is_adversarial(x) is_new_adv = ep.logical_and(is_adv, ep.logical_not(found)) result = ep.where(atleast_kd(is_new_adv, x.ndim), x, result) found = ep.logical_or(found, is_adv) return restore_type(result)
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) best = self._attack(model, x, criterion) best_is_adv = is_adversarial(best) for _ in range(1, self._times): xp = self._attack(model, x, criterion) # assumes xp does not violate the perturbation size constraint is_adv = is_adversarial(xp) new_best = ep.logical_and(is_adv, best_is_adv.logical_not()) best = ep.where(atleast_kd(new_best, best.ndim), xp, best) best_is_adv = ep.logical_or(is_adv, best_is_adv) return restore_type(best)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) self.process_raw() assert self.inputs is not None assert self.outputs is not None x, restore_type = ep.astensor_(inputs) del inputs, kwargs criterion = get_criterion(criterion) result = x found = criterion(x, model(x)) dataset_size = len(self.inputs) batch_size = len(x) while not found.all(): indices = np.random.randint(0, dataset_size, size=(batch_size, )) xp = self.inputs[indices] yp = self.outputs[indices] is_adv = criterion(xp, yp) new_found = ep.logical_and(is_adv, found.logical_not()) result = ep.where(atleast_kd(new_found, result.ndim), xp, result) found = ep.logical_or(found, new_found) return restore_type(result)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: #raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) min_, max_ = model.bounds logits = model(x) classes = logits.argsort(axis=-1).flip(axis=-1) if self.candidates is None: candidates = logits.shape[-1] # pragma: no cover else: candidates = min(self.candidates, logits.shape[-1]) if not candidates >= 2: raise ValueError( # pragma: no cover f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}" ) logging.info(f"Only testing the top-{candidates} classes") classes = classes[:, :candidates] N = len(x) rows = range(N) loss_fun = self._get_loss_fn(model, classes) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) x0 = x p_total = ep.zeros_like(x) for _ in range(self.steps): # let's first get the logits using k = 1 to see if we are done diffs = [loss_aux_and_grad(x, 1)] _, (_, logits), _ = diffs[0] is_adv = criterion(x, logits) if is_adv.all(): break # then run all the other k's as well # we could avoid repeated forward passes and only repeat # the backward pass, but this cannot currently be done in eagerpy diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)] # we don't need the logits diffs_ = [(losses, grad) for _, (losses, _), grad in diffs] losses = ep.stack([lo for lo, _ in diffs_], axis=1) grads = ep.stack([g for _, g in diffs_], axis=1) assert losses.shape == (N, candidates - 1) assert grads.shape == (N, candidates - 1) + x0.shape[1:] # calculate the distances distances = self.get_distances(losses, grads) assert distances.shape == (N, candidates - 1) # determine the best directions best = distances.argmin(axis=1) distances = distances[rows, best] losses = losses[rows, best] grads = grads[rows, best] assert distances.shape == (N,) assert losses.shape == (N,) assert grads.shape == x0.shape # apply perturbation distances = distances + 1e-4 # for numerical stability p_step = self.get_perturbations(distances, grads) assert p_step.shape == x0.shape p_total += p_step # don't do anything for those that are already adversarial x = ep.where( atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total ) x = ep.clip(x, min_, max_) return restore_type(x)
def __call__( self, inputs, labels, *, starting_points=None, init_attack=None, criterion: Callable = misclassification, steps=25000, spherical_step=1e-2, source_step=1e-2, source_step_convergance=1e-7, step_adaptation=1.5, tensorboard=False, update_stats_every_k=10, ): """Boundary Attack Differences to the original reference implementation: * We do not perform internal operations with float64 * The samples within a batch can currently influence each other a bit * We don't perform the additional convergence confirmation * The success rate tracking changed a bit * Some other changes due to batching and merged loops Parameters ---------- criterion : Callable A callable that returns true if the given logits of perturbed inputs should be considered adversarial w.r.t. to the given labels and unperturbed inputs. tensorboard : str The log directory for TensorBoard summaries. If False, TensorBoard summaries will be disabled (default). If None, the logdir will be runs/CURRENT_DATETIME_HOSTNAME. """ tb = TensorBoard(logdir=tensorboard) originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = self.model.forward(p) return criterion(originals, labels, p, logits) if starting_points is None: if init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack.__name__} for initialization.") starting_points = init_attack(self.model)(inputs, labels) best_advs = ep.astensor(starting_points) assert is_adversarial(best_advs).all() N = len(originals) ndim = originals.ndim spherical_steps = ep.ones(originals, N) * spherical_step source_steps = ep.ones(originals, N) * source_step tb.scalar("batchsize", N, 0) # create two queues for each sample to track success rates # (used to update the hyper parameters) stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N) stats_step_adversarial = ArrayQueue(maxlen=30, N=N) bounds = self.model.bounds() for step in range(1, steps + 1): converged = source_steps < source_step_convergance if converged.all(): break converged = atleast_kd(converged, ndim) # TODO: performance: ignore those that have converged # (we could select the non-converged ones, but we currently # cannot easily invert this in the end using EagerPy) unnormalized_source_directions = originals - best_advs source_norms = l2norms(unnormalized_source_directions) source_directions = unnormalized_source_directions / atleast_kd( source_norms, ndim) # only check spherical candidates every k steps check_spherical_and_update_stats = step % update_stats_every_k == 0 candidates, spherical_candidates = draw_proposals( bounds, originals, best_advs, unnormalized_source_directions, source_directions, source_norms, spherical_steps, source_steps, ) candidates.dtype == originals.dtype spherical_candidates.dtype == spherical_candidates.dtype is_adv = is_adversarial(candidates) if check_spherical_and_update_stats: spherical_is_adv = is_adversarial(spherical_candidates) stats_spherical_adversarial.append(spherical_is_adv) # TODO: algorithm: the original implementation ignores those samples # for which spherical is not adversarial and continues with the # next iteration -> we estimate different probabilities (conditional vs. unconditional) # TODO: thoughts: should we always track this because we compute it anyway stats_step_adversarial.append(is_adv) else: spherical_is_adv = None # in theory, we are closer per construction # but limited numerical precision might break this distances = l2norms(originals - candidates) closer = distances < source_norms is_best_adv = ep.logical_and(is_adv, closer) is_best_adv = atleast_kd(is_best_adv, ndim) cond = converged.logical_not().logical_and(is_best_adv) best_advs = ep.where(cond, candidates, best_advs) tb.probability("converged", converged, step) tb.scalar("updated_stats", check_spherical_and_update_stats, step) tb.histogram("norms", source_norms, step) tb.probability("is_adv", is_adv, step) if spherical_is_adv is not None: tb.probability("spherical_is_adv", spherical_is_adv, step) tb.histogram("candidates/distances", distances, step) tb.probability("candidates/closer", closer, step) tb.probability("candidates/is_best_adv", is_best_adv, step) tb.probability("new_best_adv_including_converged", is_best_adv, step) tb.probability("new_best_adv", cond, step) if check_spherical_and_update_stats: full = stats_spherical_adversarial.isfull() tb.probability("spherical_stats/full", full, step) if full.any(): probs = stats_spherical_adversarial.mean() cond1 = ep.logical_and(probs > 0.5, full) spherical_steps = ep.where( cond1, spherical_steps * step_adaptation, spherical_steps) source_steps = ep.where(cond1, source_steps * step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.2, full) spherical_steps = ep.where( cond2, spherical_steps / step_adaptation, spherical_steps) source_steps = ep.where(cond2, source_steps / step_adaptation, source_steps) stats_spherical_adversarial.clear( ep.logical_or(cond1, cond2)) tb.conditional_mean( "spherical_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio("spherical_stats/isfull/too_linear", cond1, full, step) tb.probability_ratio( "spherical_stats/isfull/too_nonlinear", cond2, full, step) full = stats_step_adversarial.isfull() tb.probability("step_stats/full", full, step) if full.any(): probs = stats_step_adversarial.mean() # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess) # instead of p(source_step_success | spherical_step_sucess) that was tracked before cond1 = ep.logical_and(probs > 0.25, full) source_steps = ep.where(cond1, source_steps * step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.1, full) source_steps = ep.where(cond2, source_steps / step_adaptation, source_steps) stats_step_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean("step_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_high", cond1, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_low", cond2, full, step) tb.histogram("spherical_step", spherical_steps, step) tb.histogram("source_step", source_steps, step) tb.close() return best_advs.tensor