def approximate_gradients( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], x_advs: ep.Tensor, steps: int, delta: ep.Tensor, ) -> ep.Tensor: # (steps, bs, ...) noise_shape = tuple([steps] + list(x_advs.shape)) if self.constraint == "l2": rv = ep.normal(x_advs, noise_shape) elif self.constraint == "linf": rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape) rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12 scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv perturbed = ep.expand_dims(x_advs, 0) + scaled_rv perturbed = ep.clip(perturbed, 0, 1) rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0), rv.ndim) multipliers_list: List[ep.Tensor] = [] for step in range(steps): decision = is_adversarial(perturbed[step]) multipliers_list.append( ep.where( decision, ep.ones( x_advs, (len(x_advs, )), ), -ep.ones( x_advs, (len(decision, )), ), )) # (steps, bs, ...) multipliers = ep.stack(multipliers_list, 0) vals = ep.where( ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1, multipliers, multipliers - ep.mean(multipliers, axis=0, keepdims=True), ) grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0) grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12 return grad
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x best = ep.ones(x, len(x)) epsilon = 0.0 stepsize = 1.0 / self.steps for _ in range(self.steps): # TODO: reduce the batch size to the ones that have not yet been sucessful is_adv = is_adversarial(x + epsilon * direction) is_best_adv = ep.logical_and(is_adv, best == 1) best = ep.where(is_best_adv, epsilon, best) if (best < 1).all(): break epsilon += stepsize eps = atleast_kd(best, x.ndim) xp = x + eps * direction return restore_type(xp)
def transform_bounds(self: ModelType, bounds: BoundsInput, inplace: bool = False) -> ModelType: """Returns a new model with the desired bounds and updates the preprocessing accordingly""" # more efficient than the base class implementation because it avoids the additional wrapper if self.bounds == bounds: if inplace: return self else: return copy.copy(self) a, b = self.bounds c, d = bounds f = (d - c) / (b - a) mean, std, flip_axis = self._preprocess_args if mean is None: mean = ep.zeros(self._dummy, 1) mean = f * (mean - a) + c if std is None: std = ep.ones(self._dummy, 1) std = f * std if inplace: model = self else: model = copy.copy(self) model._bounds = Bounds(*bounds) model._preprocess_args = (mean, std, flip_axis) return model
def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T: x, restore_type = ep.astensor_(inputs) del inputs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[ep.Tensor] = None, **kwargs: Any, ) -> T: originals, restore_type = ep.astensor_(inputs) self._nqueries = {i: 0 for i in range(len(originals))} self._set_cos_sin_function(originals) self.theta_max = ep.ones(originals, len(originals)) * self._theta_max criterion = get_criterion(criterion) self._criterion_is_adversarial = get_is_adversarial(criterion, model) # Get Starting Point if starting_points is not None: best_advs = starting_points elif starting_points is None: init_attack: MinimizationAttack = LinearSearchBlendedUniformNoiseAttack(steps=50) best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: raise ValueError("starting_points {} doesn't exist.".format(starting_points)) assert self._is_adversarial(best_advs).all() # Initialize the direction orthogonalized with the first direction fd = best_advs - originals norm = ep.norms.l2(fd.flatten(1), axis=1) fd = fd / atleast_kd(norm, fd.ndim) self._directions_ortho = {i: v.expand_dims(0) for i, v in enumerate(fd)} # Load Basis if "basis_params" in kwargs: self._basis = Basis(originals, **kwargs["basis_params"]) else: self._basis = Basis(originals) for _ in range(self._steps): # Get candidates. Shape: (n_candidates, batch_size, image_size) candidates = self._get_candidates(originals, best_advs) candidates = candidates.transpose((1, 0, 2, 3, 4)) best_candidates = ep.zeros_like(best_advs).raw for i, o in enumerate(originals): o_repeated = ep.concatenate([o.expand_dims(0)] * len(candidates[i]), axis=0) index = ep.argmax(self.distance(o_repeated, candidates[i])).raw best_candidates[i] = candidates[i][index].raw is_success = self.distance(best_candidates, originals) < self.distance(best_advs, originals) best_advs = ep.where(atleast_kd(is_success, best_candidates.ndim), ep.astensor(best_candidates), best_advs) if all(v > self._max_queries for v in self._nqueries.values()): print("Max queries attained for all the images.") break return restore_type(best_advs)
def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. highs = ep.ones(perturbed, len(perturbed)) d = np.prod(perturbed.shape[1:]) thresholds = self._BS_gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # Boost Binary search if boost: boost_vec = 0.1 * originals + 0.9 * perturbed is_advs = self._is_adversarial(boost_vec) is_advs = atleast_kd(is_advs, originals.ndim) originals = ep.where(is_advs.logical_not(), boost_vec, originals) perturbed = ep.where(is_advs, boost_vec, perturbed) # use this variable to check when mids stays constant and the BS has converged old_mids = highs iteration = 0 while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration: iteration += 1 mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = self._is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: break results = self._project(originals, perturbed, highs) return results
def test_flatten(dummy: Tensor) -> None: t = ep.ones(dummy, (16, 3, 32, 32)) assert ep.flatten(t).shape == (16 * 3 * 32 * 32, ) assert ep.flatten(t, start=1).shape == (16, 3 * 32 * 32) assert ep.flatten(t, start=2).shape == (16, 3, 32 * 32) assert ep.flatten(t, start=3).shape == (16, 3, 32, 32) assert ep.flatten(t, end=-2).shape == (16 * 3 * 32, 32) assert ep.flatten(t, end=-3).shape == (16 * 3, 32, 32) assert ep.flatten(t, end=-4).shape == (16, 3, 32, 32) assert ep.flatten(t, start=1, end=-2).shape == (16, 3 * 32, 32)
def test_tensorboard(logdir: Union[Literal[False], None, str], tmp_path: Any, dummy: ep.Tensor) -> None: if logdir == "temp": logdir = tmp_path if logdir: before = len(list(tmp_path.iterdir())) tb = fbn.tensorboard.TensorBoard(logdir) tb.scalar("a_scalar", 5, step=1) x = ep.ones(dummy, 10) tb.mean("a_mean", x, step=2) x = ep.ones(dummy, 10) == ep.arange(dummy, 10) tb.probability("a_probability", x, step=2) x = ep.arange(dummy, 10).float32() cond = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) tb.conditional_mean("a_conditional_mean", x, cond, step=2) x = ep.arange(dummy, 10).float32() cond = ep.ones(dummy, 10) == ep.zeros(dummy, 10) tb.conditional_mean("a_conditional_mean_false", x, cond, step=2) x = ep.ones(dummy, 10) == ep.arange(dummy, 10) y = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) tb.probability_ratio("a_probability_ratio", x, y, step=5) x = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2) y = ep.ones(dummy, 10) == ep.zeros(dummy, 10) tb.probability_ratio("a_probability_ratio_y_zero", x, y, step=5) x = ep.arange(dummy, 10).float32() tb.histogram("a_histogram", x, step=9, first=False) tb.histogram("a_histogram", x, step=10, first=True) tb.close() if logdir: after = len(list(tmp_path.iterdir())) assert after > before # make sure something has been written
def _binary_search( self, is_adversarial: Callable[[ep.Tensor], ep.Tensor], originals: ep.Tensor, perturbed: ep.Tensor, ) -> ep.Tensor: # Choose upper thresholds in binary search based on constraint. d = np.prod(perturbed.shape[1:]) if self.constraint == "linf": highs = linf(originals, perturbed) # TODO: Check if the threshold is correct # empirically this seems to be too low thresholds = highs * self.gamma / (d * d) else: highs = ep.ones(perturbed, len(perturbed)) thresholds = self.gamma / (d * math.sqrt(d)) lows = ep.zeros_like(highs) # use this variable to check when mids stays constant and the BS has converged old_mids = highs while ep.any(highs - lows > thresholds): mids = (lows + highs) / 2 mids_perturbed = self._project(originals, perturbed, mids) is_adversarial_ = is_adversarial(mids_perturbed) highs = ep.where(is_adversarial_, mids, highs) lows = ep.where(is_adversarial_, lows, mids) # check of there is no more progress due to numerical imprecision reached_numerical_precision = (old_mids == mids).all() old_mids = mids if reached_numerical_precision: # TODO: warn user break res = self._project(originals, perturbed, highs) return res
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x best = ep.ones(x, len(x)) epsilon = 0.0 stepsize = 1.0 / self.steps for _ in range(self.steps): # TODO: reduce the batch size to the ones that have not yet been sucessful is_adv = is_adversarial(x + epsilon * direction) is_best_adv = ep.logical_and(is_adv, best == 1) best = ep.where(is_best_adv, epsilon, best) if (best < 1).all(): break # pragma: no cover epsilon += stepsize eps = atleast_kd(best, x.ndim) xp = x + eps * direction return restore_type(xp)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) del inputs, kwargs verify_input_bounds(x, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) min_, max_ = model.bounds target = min_ + self.target * (max_ - min_) direction = target - x lower_bound = ep.zeros(x, len(x)) upper_bound = ep.ones(x, len(x)) epsilons = lower_bound for _ in range(self.binary_search_steps): eps = atleast_kd(epsilons, x.ndim) is_adv = is_adversarial(x + eps * direction) lower_bound = ep.where(is_adv, lower_bound, epsilons) upper_bound = ep.where(is_adv, epsilons, upper_bound) epsilons = (lower_bound + upper_bound) / 2 epsilons = upper_bound eps = atleast_kd(epsilons, x.ndim) xp = x + eps * direction return restore_type(xp)
def test_index_update_indices_scalar(dummy: Tensor) -> Tensor: x = ep.ones(dummy, (3, 4)) ind = ep.from_numpy(dummy, np.array([0, 1, 2, 1])) return ep.index_update(x, ep.index[ind, ep.arange(x, 4)], 33.0)
def __call__( self, inputs, labels, *, starting_points=None, init_attack=None, criterion: Callable = misclassification, steps=25000, spherical_step=1e-2, source_step=1e-2, source_step_convergance=1e-7, step_adaptation=1.5, tensorboard=False, update_stats_every_k=10, ): """Boundary Attack Differences to the original reference implementation: * We do not perform internal operations with float64 * The samples within a batch can currently influence each other a bit * We don't perform the additional convergence confirmation * The success rate tracking changed a bit * Some other changes due to batching and merged loops Parameters ---------- criterion : Callable A callable that returns true if the given logits of perturbed inputs should be considered adversarial w.r.t. to the given labels and unperturbed inputs. tensorboard : str The log directory for TensorBoard summaries. If False, TensorBoard summaries will be disabled (default). If None, the logdir will be runs/CURRENT_DATETIME_HOSTNAME. """ tb = TensorBoard(logdir=tensorboard) originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = self.model.forward(p) return criterion(originals, labels, p, logits) if starting_points is None: if init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack.__name__} for initialization.") starting_points = init_attack(self.model)(inputs, labels) best_advs = ep.astensor(starting_points) assert is_adversarial(best_advs).all() N = len(originals) ndim = originals.ndim spherical_steps = ep.ones(originals, N) * spherical_step source_steps = ep.ones(originals, N) * source_step tb.scalar("batchsize", N, 0) # create two queues for each sample to track success rates # (used to update the hyper parameters) stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N) stats_step_adversarial = ArrayQueue(maxlen=30, N=N) bounds = self.model.bounds() for step in range(1, steps + 1): converged = source_steps < source_step_convergance if converged.all(): break converged = atleast_kd(converged, ndim) # TODO: performance: ignore those that have converged # (we could select the non-converged ones, but we currently # cannot easily invert this in the end using EagerPy) unnormalized_source_directions = originals - best_advs source_norms = l2norms(unnormalized_source_directions) source_directions = unnormalized_source_directions / atleast_kd( source_norms, ndim) # only check spherical candidates every k steps check_spherical_and_update_stats = step % update_stats_every_k == 0 candidates, spherical_candidates = draw_proposals( bounds, originals, best_advs, unnormalized_source_directions, source_directions, source_norms, spherical_steps, source_steps, ) candidates.dtype == originals.dtype spherical_candidates.dtype == spherical_candidates.dtype is_adv = is_adversarial(candidates) if check_spherical_and_update_stats: spherical_is_adv = is_adversarial(spherical_candidates) stats_spherical_adversarial.append(spherical_is_adv) # TODO: algorithm: the original implementation ignores those samples # for which spherical is not adversarial and continues with the # next iteration -> we estimate different probabilities (conditional vs. unconditional) # TODO: thoughts: should we always track this because we compute it anyway stats_step_adversarial.append(is_adv) else: spherical_is_adv = None # in theory, we are closer per construction # but limited numerical precision might break this distances = l2norms(originals - candidates) closer = distances < source_norms is_best_adv = ep.logical_and(is_adv, closer) is_best_adv = atleast_kd(is_best_adv, ndim) cond = converged.logical_not().logical_and(is_best_adv) best_advs = ep.where(cond, candidates, best_advs) tb.probability("converged", converged, step) tb.scalar("updated_stats", check_spherical_and_update_stats, step) tb.histogram("norms", source_norms, step) tb.probability("is_adv", is_adv, step) if spherical_is_adv is not None: tb.probability("spherical_is_adv", spherical_is_adv, step) tb.histogram("candidates/distances", distances, step) tb.probability("candidates/closer", closer, step) tb.probability("candidates/is_best_adv", is_best_adv, step) tb.probability("new_best_adv_including_converged", is_best_adv, step) tb.probability("new_best_adv", cond, step) if check_spherical_and_update_stats: full = stats_spherical_adversarial.isfull() tb.probability("spherical_stats/full", full, step) if full.any(): probs = stats_spherical_adversarial.mean() cond1 = ep.logical_and(probs > 0.5, full) spherical_steps = ep.where( cond1, spherical_steps * step_adaptation, spherical_steps) source_steps = ep.where(cond1, source_steps * step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.2, full) spherical_steps = ep.where( cond2, spherical_steps / step_adaptation, spherical_steps) source_steps = ep.where(cond2, source_steps / step_adaptation, source_steps) stats_spherical_adversarial.clear( ep.logical_or(cond1, cond2)) tb.conditional_mean( "spherical_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio("spherical_stats/isfull/too_linear", cond1, full, step) tb.probability_ratio( "spherical_stats/isfull/too_nonlinear", cond2, full, step) full = stats_step_adversarial.isfull() tb.probability("step_stats/full", full, step) if full.any(): probs = stats_step_adversarial.mean() # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess) # instead of p(source_step_success | spherical_step_sucess) that was tracked before cond1 = ep.logical_and(probs > 0.25, full) source_steps = ep.where(cond1, source_steps * step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.1, full) source_steps = ep.where(cond2, source_steps / step_adaptation, source_steps) stats_step_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean("step_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_high", cond1, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_low", cond2, full, step) tb.histogram("spherical_step", spherical_steps, step) tb.histogram("source_step", source_steps, step) tb.close() return best_advs.tensor
def test_index_update_row(dummy: Tensor) -> Tensor: x = ep.ones(dummy, (3, 4)) return ep.index_update(x, ep.index[1], ep.ones(x, 4) * 66.0)
def test_index_update_column_scalar(dummy: Tensor) -> Tensor: x = ep.ones(dummy, (3, 4)) return ep.index_update(x, ep.index[:, 1], 66.0)
def test_ones_scalar(t: Tensor) -> Tensor: return ep.ones(t, 5)
def test_ones_tuple(t: Tensor) -> Tensor: return ep.ones(t, (2, 3))
def __call__( self, model: Model, inputs, labels, *, criterion=misclassification, channel_axis: Optional[int] = None, ): """ Parameters ---------- channel_axis The axis across which the noise should be the same (if across_channels is True). If None, will be automatically inferred from the model if possible. """ inputs, labels, restore = wrap(inputs, labels) is_adversarial = get_is_adversarial(criterion, inputs, labels, model) x0 = inputs N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if channel_axis is None and not hasattr(model, "data_format"): raise ValueError( "cannot infer the data_format from the model, please specify" " channel_axis when calling the attack") elif channel_axis is None: data_format = model.data_format # type: ignore if (data_format is None or data_format != "channels_first" and data_format != "channels_last"): raise ValueError( f"expected data_format to be 'channels_first' or 'channels_last'" ) channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1 elif not 0 <= channel_axis < x0.ndim: raise ValueError( f"expected channel_axis to be in [0, {x0.ndim})") shape[channel_axis] = 1 min_, max_ = model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore(result)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, Any] = None, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) min_, max_ = model.bounds N = len(x) for j in range(self.directions): # random noise inputs tend to be classified into the same class, # so we might need to make very many draws if the original class # is that one random_ = ep.uniform(x, x.shape, min_, max_) is_adv_ = atleast_kd(is_adversarial(random_), x.ndim) if j == 0: random = random_ is_adv = is_adv_ else: random = ep.where(is_adv, random, random_) is_adv = is_adv.logical_or(is_adv_) if is_adv.all(): break if not is_adv.all(): warnings.warn( f"{self.__class__.__name__} failed to draw sufficient random" f" inputs that are adversarial ({is_adv.sum()} / {N}).") x0 = x epsilons = np.linspace(0, 1, num=self.steps + 1, dtype=np.float32) best = ep.ones(x, (N, )) for epsilon in epsilons: x = (1 - epsilon) * x0 + epsilon * random # TODO: due to limited floating point precision, clipping can be required is_adv = is_adversarial(x) epsilon = epsilon.item() best = ep.minimum(ep.where(is_adv, epsilon, 1.0), best) if (best < 1).all(): break best = atleast_kd(best, x0.ndim) x = (1 - best) * x0 + best * random return restore_type(x)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") stepsize = 1.0 min_, max_ = model.bounds def loss_fn(inputs: ep.Tensor, labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) delta = ep.zeros_like(x) epsilon = self.init_epsilon * ep.ones(x, len(x)) worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1) best_l2 = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of LR starting from 1.0 to 0.01 stepsize = (0.01 + (stepsize - 0.01) * (1 + math.cos(math.pi * i / self.steps)) / 2) x_adv = x + delta _, logits, gradients = grad_and_logits(x_adv, classes) gradients = normalize_gradient_l2_norms(gradients) is_adversarial = criterion_(x_adv, logits) l2 = ep.norms.l2(flatten(delta), axis=-1) is_smaller = l2 <= best_l2 is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_l2 = ep.where(is_both, l2, best_l2) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # do step delta = delta + stepsize * gradients epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma, 1.0 + self.gamma) epsilon = ep.minimum(epsilon, worst_norm) # project to epsilon ball delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1), x.ndim) # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def __call__( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], ) -> T: x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels change_classes_logits = self.confidence elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes change_classes_logits = -self.confidence else: raise ValueError("unsupported criterion") def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: if change_classes_logits != 0: logits += ep.onehot_like(logits, classes, value=change_classes_logits) return criterion_(perturbed, logits) if classes.shape != (N,): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}" ) min_, max_ = model.bounds rows = range(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: assert y_k.shape == x.shape assert consts.shape == (N,) logits = model(y_k) if targeted: c_minimize = best_other_classes(logits, classes) c_maximize = classes else: c_minimize = classes c_maximize = best_other_classes(logits, classes) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N,) is_adv_loss = is_adv_loss + self.confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(y_k - x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, logits loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = self.initial_const * ep.ones(x, (N,)) lower_bounds = ep.zeros(x, (N,)) upper_bounds = ep.inf * ep.ones(x, (N,)) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N,)) * ep.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( binary_search_step == self.binary_search_steps - 1 and self.binary_search_steps >= 10 ): # in the last iteration, repeat the search once consts = ep.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss x_k = x y_k = x found_advs = ep.full( x, (N,), value=False ).bool() # found adv with the current consts loss_at_previous_check = ep.ones(x, (1,)) * ep.inf for iteration in range(self.steps): # square-root learning rate decay stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5 loss, logits, gradient = loss_aux_and_grad(y_k, consts) x_k_old = x_k x_k = project_shrinkage_thresholding( y_k - stepsize * gradient, x, self.regularization, min_, max_ ) y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old) if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0: # after each tenth of the iterations, check progress # TODO: loss is a scalar ep tensor. is this the bst way to # implement the condition? if not ep.all(loss <= 0.9999 * loss_at_previous_check): break # stop optimization if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adversarial(x_k, logits) best_advs, best_advs_norms = apply_decision_rule( self.decision_rule, self.regularization, best_advs, best_advs_norms, x_k, x, found_advs_iter, ) found_advs = ep.logical_or(found_advs, found_advs_iter) upper_bounds = ep.where(found_advs, consts, upper_bounds) lower_bounds = ep.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = ep.where( ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search ) return restore_type(best_advs)
def __call__( self, inputs, labels, *, target_classes=None, binary_search_steps=9, max_iterations=10000, confidence=0, learning_rate=1e-2, initial_const=1e-3, abort_early=True, ): x = ep.astensor(inputs) N = len(x) targeted = target_classes is not None if targeted: labels = None target_classes = ep.astensor(target_classes) assert target_classes.shape == (N, ) is_adv = partial(targeted_is_adv, target_classes=target_classes, confidence=confidence) else: labels = ep.astensor(labels) assert labels.shape == (N, ) is_adv = partial(untargeted_is_adv, labels=labels, confidence=confidence) bounds = self.model.bounds() to_attack_space = partial(_to_attack_space, bounds=bounds) to_model_space = partial(_to_model_space, bounds=bounds) x_attack = to_attack_space(x) reconstsructed_x = to_model_space(x_attack) rows = np.arange(N) def loss_fun(delta: ep.Tensor, consts: ep.Tensor) -> ep.Tensor: assert delta.shape == x_attack.shape assert consts.shape == (N, ) x = to_model_space(x_attack + delta) logits = ep.astensor(self.model.forward(x.tensor)) if targeted: c_minimize = best_other_classes(logits, target_classes) c_maximize = target_classes else: c_minimize = labels c_maximize = best_other_classes(logits, labels) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N, ) is_adv_loss = is_adv_loss + confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, (x, logits) loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = initial_const * np.ones((N, )) lower_bounds = np.zeros((N, )) upper_bounds = np.inf * np.ones((N, )) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N, )) * np.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(binary_search_steps): if (binary_search_step == binary_search_steps - 1 and binary_search_steps >= 10): # in the last iteration, repeat the search once consts = np.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss delta = ep.zeros_like(x_attack) optimizer = AdamOptimizer(delta) found_advs = np.full( (N, ), fill_value=False) # found adv with the current consts loss_at_previous_check = np.inf consts_ = ep.from_numpy(x, consts.astype(np.float32)) for iteration in range(max_iterations): loss, (perturbed, logits), gradient = loss_aux_and_grad(delta, consts_) delta += optimizer(gradient, learning_rate) if abort_early and iteration % (np.ceil( max_iterations / 10)) == 0: # after each tenth of the iterations, check progress if not (loss <= 0.9999 * loss_at_previous_check): break # stop Adam if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adv(logits) found_advs = np.logical_or(found_advs, found_advs_iter.numpy()) norms = flatten(perturbed - x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms new_best = closer.float32() * found_advs_iter.float32() best_advs = ( atleast_kd(new_best, best_advs.ndim) * perturbed + (1 - atleast_kd(new_best, best_advs.ndim)) * best_advs) best_advs_norms = new_best * norms + ( 1 - new_best) * best_advs_norms upper_bounds = np.where(found_advs, consts, upper_bounds) lower_bounds = np.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = np.where(np.isinf(upper_bounds), consts_exponential_search, consts_binary_search) return best_advs.tensor
def test_max_axes(dummy: Tensor) -> Tensor: t = ep.ones(dummy, 30).float32().reshape((3, 5, 2)) return ep.max(t, axis=(0, 1))
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) criterion_ = get_criterion(criterion) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(inputs) if targeted: c_minimize = best_other_classes(logits, labels) c_maximize = labels # target_classes else: c_minimize = labels # labels c_maximize = best_other_classes(logits, labels) loss = logits[rows, c_minimize] - logits[rows, c_maximize] return -loss.sum(), (logits, loss) x, restore_type = ep.astensor_(inputs) del inputs, criterion, kwargs N = len(x) # start from initialization points/attack if starting_points is not None: x1 = starting_points else: if self.init_attack is not None: x1 = self.init_attack.run(model, x, criterion_) else: x1 = None # if initial points or initialization attacks are provided, # search for the boundary if x1 is not None: is_adv = get_is_adversarial(criterion_, model) assert is_adv(x1).all() lower_bound = ep.zeros(x, shape=(N, )) upper_bound = ep.ones(x, shape=(N, )) for _ in range(self.binary_search_steps): epsilons = (lower_bound + upper_bound) / 2 mid_points = self.mid_points(x, x1, epsilons, model.bounds) is_advs = is_adv(mid_points) lower_bound = ep.where(is_advs, lower_bound, epsilons) upper_bound = ep.where(is_advs, epsilons, upper_bound) starting_points = self.mid_points(x, x1, upper_bound, model.bounds) delta = starting_points - x else: # start from x0 delta = ep.zeros_like(x) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds rows = range(N) grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) if self.p != 0: epsilon = ep.inf * ep.ones(x, len(x)) else: epsilon = ep.ones(x, len(x)) if x1 is None \ else ep.norms.l0(flatten(delta), axis=-1) if self.p != 0: worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)), p=self.p, axis=-1) else: worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32() best_lp = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of learning rates stepsize = (self.min_stepsize + (self.max_stepsize - self.min_stepsize) * (1 + math.cos(math.pi * i / self.steps)) / 2) gamma = (0.001 + (self.gamma - 0.001) * (1 + math.cos(math.pi * (i / self.steps))) / 2) x_adv = x + delta loss, (logits, loss_batch), gradients = grad_and_logits(x_adv, classes) is_adversarial = criterion_(x_adv, logits) lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1) is_smaller = lp <= best_lp is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_lp = ep.where(is_both, lp, best_lp) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # update epsilon if self.p != 0: distance_to_boundary = abs(loss_batch) / ep.norms.lp( flatten(gradients), p=self.dual, axis=-1) epsilon = ep.where( is_adversarial, ep.minimum( epsilon * (1 - gamma), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.where( adv_found, epsilon * (1 + gamma), ep.norms.lp(flatten(delta), p=self.p, axis=-1) + distance_to_boundary)) else: epsilon = ep.where( is_adversarial, ep.minimum( ep.minimum(epsilon - 1, (epsilon * (1 - gamma)).astype(int).astype( epsilon.dtype)), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)), ep.maximum(epsilon + 1, (epsilon * (1 + gamma)).astype(int).astype( epsilon.dtype))) epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype) # clip epsilon epsilon = ep.minimum(epsilon, worst_norm) # computes normalized gradient update grad_ = self.normalize(gradients, x=x, bounds=model.bounds) * stepsize # do step delta = delta + grad_ # project according to the given norm delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def __call__(self, inputs, labels, *, criterion, steps=1000): originals = ep.astensor(inputs) labels = ep.astensor(labels) def is_adversarial(p: ep.Tensor) -> ep.Tensor: """For each input in x, returns true if it is an adversarial for the given model and criterion""" logits = ep.astensor(self.model.forward(p.tensor)) return criterion(originals, labels, p, logits) x0 = ep.astensor(inputs) N = len(x0) shape = list(x0.shape) if self.channel_axis is not None: shape[self.channel_axis] = 1 min_, max_ = self.model.bounds() r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / steps p = stepsizes for step in range(steps): # add salt and pepper u = ep.uniform(x0, shape) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).square().sum(axis=-1).sqrt() closer = norms < best_advs_norms is_adv = is_adversarial(x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where( is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability ) remaining = steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes ) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return result.tensor
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) originals, restore_type = ep.astensor_(inputs) del inputs, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50) logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization.") else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: best_advs = ep.astensor(starting_points) is_adv = is_adversarial(best_advs) if not is_adv.all(): failed = is_adv.logical_not().float32().sum() if starting_points is None: raise ValueError( f"init_attack failed for {failed} of {len(is_adv)} inputs") else: raise ValueError( f"{failed} of {len(is_adv)} starting_points are not adversarial" ) del starting_points tb = TensorBoard(logdir=self.tensorboard) N = len(originals) ndim = originals.ndim spherical_steps = ep.ones(originals, N) * self.spherical_step source_steps = ep.ones(originals, N) * self.source_step tb.scalar("batchsize", N, 0) # create two queues for each sample to track success rates # (used to update the hyper parameters) stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N) stats_step_adversarial = ArrayQueue(maxlen=30, N=N) bounds = model.bounds self.class_1 = [] self.class_2 = [] self.surrogate_model = None device = model.device train_step = 500 for step in tqdm(range(1, self.steps + 1)): converged = source_steps < self.source_step_convergance if converged.all(): break # pragma: no cover converged = atleast_kd(converged, ndim) # TODO: performance: ignore those that have converged # (we could select the non-converged ones, but we currently # cannot easily invert this in the end using EagerPy) unnormalized_source_directions = originals - best_advs source_norms = ep.norms.l2(flatten(unnormalized_source_directions), axis=-1) source_directions = unnormalized_source_directions / atleast_kd( source_norms, ndim) # only check spherical candidates every k steps check_spherical_and_update_stats = step % self.update_stats_every_k == 0 candidates, spherical_candidates = draw_proposals( bounds, originals, best_advs, unnormalized_source_directions, source_directions, source_norms, spherical_steps, source_steps, self.surrogate_model) candidates.dtype == originals.dtype spherical_candidates.dtype == spherical_candidates.dtype is_adv = is_adversarial(candidates) is_adv_spherical_candidates = is_adversarial(spherical_candidates) if is_adv.item(): self.class_1.append(candidates) if not is_adv_spherical_candidates.item(): self.class_2.append(spherical_candidates) if (step % train_step == 0) and (step > 0): start_time = time() class_1 = self.class_1 class_2 = self.class_2 class_1 = np.array([image.numpy()[0] for image in class_1]) class_2 = np.array([image.numpy()[0] for image in class_2]) class_2 = class_2[:len(class_1)] data = np.concatenate([class_1, class_2]) labels = np.append(np.ones(len(class_1)), np.zeros(len(class_2))) X = torch.tensor(data).to(device) y = torch.tensor(labels, dtype=torch.long).to(device) if self.surrogate_model is None: model_sur = torchvision.models.resnet18(pretrained=True) #model.features[0] = torch.nn.Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) model_sur.fc = torch.nn.Linear(in_features=512, out_features=2, bias=True) model_sur = model_sur.to(device) else: model_sur = model_surrogate X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42) optimizer = torch.optim.Adam(model_sur.parameters(), lr=3e-4) loss = torch.nn.CrossEntropyLoss() model_surrogate, accuracy_history_test, accuracy_history_train = train( model_sur, optimizer, loss, X_train, y_train, X_test, y_test) model_surrogate = model_surrogate.eval() self.surrogate_model = fb.PyTorchModel(model_surrogate, bounds=(0, 1), device=device) end_time = time() #print('Time for train: ', np.round(end_time - start_time, 2)) #print('\n') spherical_is_adv: Optional[ep.Tensor] if check_spherical_and_update_stats: spherical_is_adv = is_adversarial(spherical_candidates) stats_spherical_adversarial.append(spherical_is_adv) # TODO: algorithm: the original implementation ignores those samples # for which spherical is not adversarial and continues with the # next iteration -> we estimate different probabilities (conditional vs. unconditional) # TODO: thoughts: should we always track this because we compute it anyway stats_step_adversarial.append(is_adv) else: spherical_is_adv = None # in theory, we are closer per construction # but limited numerical precision might break this distances = ep.norms.l2(flatten(originals - candidates), axis=-1) closer = distances < source_norms is_best_adv = ep.logical_and(is_adv, closer) is_best_adv = atleast_kd(is_best_adv, ndim) cond = converged.logical_not().logical_and(is_best_adv) best_advs = ep.where(cond, candidates, best_advs) tb.probability("converged", converged, step) tb.scalar("updated_stats", check_spherical_and_update_stats, step) tb.histogram("norms", source_norms, step) tb.probability("is_adv", is_adv, step) if spherical_is_adv is not None: tb.probability("spherical_is_adv", spherical_is_adv, step) tb.histogram("candidates/distances", distances, step) tb.probability("candidates/closer", closer, step) tb.probability("candidates/is_best_adv", is_best_adv, step) tb.probability("new_best_adv_including_converged", is_best_adv, step) tb.probability("new_best_adv", cond, step) if check_spherical_and_update_stats: full = stats_spherical_adversarial.isfull() tb.probability("spherical_stats/full", full, step) if full.any(): probs = stats_spherical_adversarial.mean() cond1 = ep.logical_and(probs > 0.5, full) spherical_steps = ep.where( cond1, spherical_steps * self.step_adaptation, spherical_steps) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.2, full) spherical_steps = ep.where( cond2, spherical_steps / self.step_adaptation, spherical_steps) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps) stats_spherical_adversarial.clear( ep.logical_or(cond1, cond2)) tb.conditional_mean( "spherical_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio("spherical_stats/isfull/too_linear", cond1, full, step) tb.probability_ratio( "spherical_stats/isfull/too_nonlinear", cond2, full, step) full = stats_step_adversarial.isfull() tb.probability("step_stats/full", full, step) if full.any(): probs = stats_step_adversarial.mean() # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess) # instead of p(source_step_success | spherical_step_sucess) that was tracked before cond1 = ep.logical_and(probs > 0.25, full) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.1, full) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps) stats_step_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean("step_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_high", cond1, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_low", cond2, full, step) tb.histogram("spherical_step", spherical_steps, step) tb.histogram("source_step", source_steps, step) tb.close() return restore_type(best_advs)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, epsilons: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) originals, restore_type = ep.astensor_(inputs) del inputs, kwargs if self.eps_early_stop and len(epsilons)!=1: print('epsilon-based early stopping only possible for one epsilon value') assert not(self.eps_early_stop and len(epsilons)!=1) verify_input_bounds(originals, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50) logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization." ) else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) best_advs = init_attack.run( model, originals, criterion, early_stop=early_stop ) else: best_advs = ep.astensor(starting_points) is_adv = is_adversarial(best_advs) if not is_adv.all(): failed = is_adv.logical_not().float32().sum() if starting_points is None: raise ValueError( f"init_attack failed for {failed} of {len(is_adv)} inputs" ) else: raise ValueError( f"{failed} of {len(is_adv)} starting_points are not adversarial" ) del starting_points tb = TensorBoard(logdir=self.tensorboard) N = len(originals) epsilon = ep.astensor(epsilons[0] * ep.ones(originals,(N,))) ndim = originals.ndim spherical_steps = ep.ones(originals, N) * self.spherical_step source_steps = ep.ones(originals, N) * self.source_step tb.scalar("batchsize", N, 0) # create two queues for each sample to track success rates # (used to update the hyper parameters) stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N) stats_step_adversarial = ArrayQueue(maxlen=30, N=N) bounds = model.bounds for step in range(1, self.steps + 1): converged = source_steps < self.source_step_convergance if converged.all(): break # pragma: no cover converged = atleast_kd(converged, ndim) # TODO: performance: ignore those that have converged # (we could select the non-converged ones, but we currently # cannot easily invert this in the end using EagerPy) unnormalized_source_directions = originals - best_advs source_norms = ep.norms.l2(flatten(unnormalized_source_directions), axis=-1) source_directions = unnormalized_source_directions / atleast_kd( source_norms, ndim ) # only check spherical candidates every k steps check_spherical_and_update_stats = step % self.update_stats_every_k == 0 candidates, spherical_candidates = draw_proposals( bounds, originals, best_advs, unnormalized_source_directions, source_directions, source_norms, spherical_steps, source_steps, ) candidates.dtype == originals.dtype spherical_candidates.dtype == spherical_candidates.dtype is_adv = is_adversarial(candidates) spherical_is_adv: Optional[ep.Tensor] if check_spherical_and_update_stats: spherical_is_adv = is_adversarial(spherical_candidates) stats_spherical_adversarial.append(spherical_is_adv) # TODO: algorithm: the original implementation ignores those samples # for which spherical is not adversarial and continues with the # next iteration -> we estimate different probabilities (conditional vs. unconditional) # TODO: thoughts: should we always track this because we compute it anyway stats_step_adversarial.append(is_adv) else: spherical_is_adv = None # in theory, we are closer per construction # but limited numerical precision might break this distances = ep.norms.l2(flatten(originals - candidates), axis=-1) closer = distances < source_norms is_best_adv = ep.logical_and(is_adv, closer) is_best_adv = atleast_kd(is_best_adv, ndim) cond = converged.logical_not().logical_and(is_best_adv) best_advs = ep.where(cond, candidates, best_advs) tb.probability("converged", converged, step) tb.scalar("updated_stats", check_spherical_and_update_stats, step) tb.histogram("norms", source_norms, step) tb.probability("is_adv", is_adv, step) if spherical_is_adv is not None: tb.probability("spherical_is_adv", spherical_is_adv, step) tb.histogram("candidates/distances", distances, step) tb.probability("candidates/closer", closer, step) tb.probability("candidates/is_best_adv", is_best_adv, step) tb.probability("new_best_adv_including_converged", is_best_adv, step) tb.probability("new_best_adv", cond, step) if check_spherical_and_update_stats: full = stats_spherical_adversarial.isfull() tb.probability("spherical_stats/full", full, step) if full.any(): probs = stats_spherical_adversarial.mean() cond1 = ep.logical_and(probs > 0.5, full) spherical_steps = ep.where( cond1, spherical_steps * self.step_adaptation, spherical_steps ) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps ) cond2 = ep.logical_and(probs < 0.2, full) spherical_steps = ep.where( cond2, spherical_steps / self.step_adaptation, spherical_steps ) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps ) stats_spherical_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean( "spherical_stats/isfull/success_rate/mean", probs, full, step ) tb.probability_ratio( "spherical_stats/isfull/too_linear", cond1, full, step ) tb.probability_ratio( "spherical_stats/isfull/too_nonlinear", cond2, full, step ) full = stats_step_adversarial.isfull() tb.probability("step_stats/full", full, step) if full.any(): probs = stats_step_adversarial.mean() # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess) # instead of p(source_step_success | spherical_step_sucess) that was tracked before cond1 = ep.logical_and(probs > 0.25, full) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps ) cond2 = ep.logical_and(probs < 0.1, full) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps ) stats_step_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean( "step_stats/isfull/success_rate/mean", probs, full, step ) tb.probability_ratio( "step_stats/isfull/success_rate_too_high", cond1, full, step ) tb.probability_ratio( "step_stats/isfull/success_rate_too_low", cond2, full, step ) tb.histogram("spherical_step", spherical_steps, step) tb.histogram("source_step", source_steps, step) best_advs_norms = flatten(originals - best_advs).norms.l2(axis=-1) if self.eps_early_stop and (ep.maximum(best_advs_norms,epsilon) == epsilon).all(): print('early stopped because epsilon condition satisfied') break tb.close() return restore_type(best_advs)
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, epsilons: float, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) originals, restore_type = ep.astensor_(inputs) del inputs, kwargs N = len(originals) if self.eps_early_stop and len(epsilons) != 1: print( 'epsilon-based early stopping only possible for one epsilon value' ) assert not (self.eps_early_stop and len(epsilons) != 1) epsilon = ep.astensor(epsilons[0] * ep.ones(originals, (N, ))) del epsilons verify_input_bounds(originals, model) criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50) logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization.") else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) x_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: x_advs = ep.astensor(starting_points) is_adv = is_adversarial(x_advs) if not is_adv.all(): failed = is_adv.logical_not().float32().sum() if starting_points is None: raise ValueError( f"init_attack failed for {failed} of {len(is_adv)} inputs") else: raise ValueError( f"{failed} of {len(is_adv)} starting_points are not adversarial" ) del starting_points tb = TensorBoard(logdir=self.tensorboard) # Project the initialization to the boundary. x_advs = self._binary_search(is_adversarial, originals, x_advs) assert ep.all(is_adversarial(x_advs)) distances = self.distance(originals, x_advs) for step in range(self.steps): delta = self.select_delta(originals, distances, step) # Choose number of gradient estimation steps. num_gradient_estimation_steps = int( min([ self.initial_num_evals * math.sqrt(step + 1), self.max_num_evals ])) gradients = self.approximate_gradients( is_adversarial, x_advs, num_gradient_estimation_steps, delta) if self.constraint == "linf": update = ep.sign(gradients) else: update = gradients if self.stepsize_search == "geometric_progression": # find step size. epsilons = distances / math.sqrt(step + 1) while True: x_advs_proposals = ep.clip( x_advs + atleast_kd(epsilons, x_advs.ndim) * update, 0, 1) success = is_adversarial(x_advs_proposals) epsilons = ep.where(success, epsilons, epsilons / 2.0) if ep.all(success): break # Update the sample. x_advs = ep.clip( x_advs + atleast_kd(epsilons, update.ndim) * update, 0, 1) assert ep.all(is_adversarial(x_advs)) # Binary search to return to the boundary. x_advs = self._binary_search(is_adversarial, originals, x_advs) assert ep.all(is_adversarial(x_advs)) elif self.stepsize_search == "grid_search": # Grid search for stepsize. epsilons_grid = ep.expand_dims( ep.from_numpy( distances, np.logspace( -4, 0, num=20, endpoint=True, dtype=np.float32), ), 1, ) * ep.expand_dims(distances, 0) proposals_list = [] for epsilons in epsilons_grid: x_advs_proposals = ( x_advs + atleast_kd(epsilons, update.ndim) * update) x_advs_proposals = ep.clip(x_advs_proposals, 0, 1) mask = is_adversarial(x_advs_proposals) x_advs_proposals = self._binary_search( is_adversarial, originals, x_advs_proposals) # only use new values where initial guess was already adversarial x_advs_proposals = ep.where(atleast_kd(mask, x_advs.ndim), x_advs_proposals, x_advs) proposals_list.append(x_advs_proposals) proposals = ep.stack(proposals_list, 0) proposals_distances = self.distance( ep.expand_dims(originals, 0), proposals) minimal_idx = ep.argmin(proposals_distances, 0) x_advs = proposals[minimal_idx] distances = self.distance(originals, x_advs) # log stats tb.histogram("norms", distances, step) if self.eps_early_stop and (ep.maximum(distances, epsilon) == epsilon).all(): print('early stopped because epsilon condition satisfied') break return restore_type(x_advs)
def __call__( self, inputs, labels, *, target_classes=None, binary_search_steps=9, max_iterations=10000, confidence=0, initial_learning_rate=1e-2, regularization=1e-2, initial_const=1e-3, abort_early=True, decision_rule="EN", ): x_0 = ep.astensor(inputs) N = len(x_0) assert decision_rule in ("EN", "L1") targeted = target_classes is not None if targeted: labels = None target_classes = ep.astensor(target_classes) assert target_classes.shape == (N, ) is_adv = partial(targeted_is_adv, target_classes=target_classes, confidence=confidence) else: labels = ep.astensor(labels) assert labels.shape == (N, ) is_adv = partial(untargeted_is_adv, labels=labels, confidence=confidence) min_, max_ = self.model.bounds() rows = np.arange(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> ep.Tensor: assert y_k.shape == x_0.shape assert consts.shape == (N, ) logits = ep.astensor(self.model.forward(y_k.tensor)) if targeted: c_minimize = best_other_classes(logits, target_classes) c_maximize = target_classes else: c_minimize = labels c_maximize = best_other_classes(logits, labels) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N, ) is_adv_loss = is_adv_loss + confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = flatten(y_k - x_0).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, (y_k, logits) loss_aux_and_grad = ep.value_and_grad_fn(x_0, loss_fun, has_aux=True) consts = initial_const * np.ones((N, )) lower_bounds = np.zeros((N, )) upper_bounds = np.inf * np.ones((N, )) best_advs = ep.zeros_like(x_0) best_advs_norms = ep.ones(x_0, (N, )) * np.inf # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(binary_search_steps): if (binary_search_step == binary_search_steps - 1 and binary_search_steps >= 10): # in the last iteration, repeat the search once consts = np.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss # TODO: rewrite this once eagerpy supports .copy() x_k = x_0 # ep.zeros_like(x_0) + x_0 y_k = x_0 # ep.zeros_like(x_0) + x_0 found_advs = np.full( (N, ), fill_value=False) # found adv with the current consts loss_at_previous_check = np.inf consts_ = ep.from_numpy(x_0, consts.astype(np.float32)) for iteration in range(max_iterations): # square-root learning rate decay learning_rate = (initial_learning_rate * (1.0 - iteration / max_iterations)**0.5) loss, (x, logits), gradient = loss_aux_and_grad(x_k, consts_) x_k_old = x_k x_k = project_shrinkage_thresholding( y_k - learning_rate * gradient, x_0, regularization, min_, max_) y_k = x_k + iteration / (iteration + 3) - (x_k - x_k_old) if abort_early and iteration % (np.ceil( max_iterations / 10)) == 0: # after each tenth of the iterations, check progress if not (loss <= 0.9999 * loss_at_previous_check): break # stop Adam if there has been no progress loss_at_previous_check = loss found_advs_iter = is_adv(logits) best_advs, best_advs_norms = apply_decision_rule( decision_rule, regularization, best_advs, best_advs_norms, x_k, x_0, found_advs_iter, ) found_advs = np.logical_or(found_advs, found_advs_iter.numpy()) upper_bounds = np.where(found_advs, consts, upper_bounds) lower_bounds = np.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = np.where(np.isinf(upper_bounds), consts_exponential_search, consts_binary_search) return best_advs.tensor
def run( self, model: Model, inputs: T, criterion: Misclassification, *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x0, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs is_adversarial = get_is_adversarial(criterion_, model) N = len(x0) shape = list(x0.shape) if self.across_channels and x0.ndim > 2: if self.channel_axis is None: channel_axis = get_channel_axis(model, x0.ndim) else: channel_axis = self.channel_axis % x0.ndim if channel_axis is not None: shape[channel_axis] = 1 min_, max_ = model.bounds r = max_ - min_ result = x0 is_adv = is_adversarial(result) best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf)) min_probability = ep.zeros(x0, N) max_probability = ep.ones(x0, N) stepsizes = max_probability / self.steps p = stepsizes for step in range(self.steps): # add salt and pepper u = ep.uniform(x0, tuple(shape)) p_ = atleast_kd(p, x0.ndim) salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r pepper = -(u < p_ / 2).astype(x0.dtype) * r x = x0 + salt + pepper x = ep.clip(x, min_, max_) # check if we found new best adversarials norms = flatten(x).norms.l2(axis=-1) closer = norms < best_advs_norms is_adv = is_adversarial( x) # TODO: ignore those that are not closer anyway is_best_adv = ep.logical_and(is_adv, closer) # update results and search space result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result) best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms) min_probability = ep.where(is_best_adv, 0.5 * p, min_probability) # we set max_probability a bit higher than p because the relationship # between p and norms is not strictly monotonic max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability) remaining = self.steps - step stepsizes = ep.where( is_best_adv, (max_probability - min_probability) / remaining, stepsizes) reset = p == max_probability p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p) p = ep.minimum(p + stepsizes, max_probability) return restore_type(result)