Exemplo n.º 1
0
    def approximate_gradients(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        x_advs: ep.Tensor,
        steps: int,
        delta: ep.Tensor,
    ) -> ep.Tensor:
        # (steps, bs, ...)
        noise_shape = tuple([steps] + list(x_advs.shape))
        if self.constraint == "l2":
            rv = ep.normal(x_advs, noise_shape)
        elif self.constraint == "linf":
            rv = ep.uniform(x_advs, low=-1, high=1, shape=noise_shape)
        rv /= atleast_kd(ep.norms.l2(flatten(rv, keep=1), -1), rv.ndim) + 1e-12

        scaled_rv = atleast_kd(ep.expand_dims(delta, 0), rv.ndim) * rv

        perturbed = ep.expand_dims(x_advs, 0) + scaled_rv
        perturbed = ep.clip(perturbed, 0, 1)

        rv = (perturbed - x_advs) / atleast_kd(ep.expand_dims(delta + 1e-8, 0),
                                               rv.ndim)

        multipliers_list: List[ep.Tensor] = []
        for step in range(steps):
            decision = is_adversarial(perturbed[step])
            multipliers_list.append(
                ep.where(
                    decision,
                    ep.ones(
                        x_advs,
                        (len(x_advs, )),
                    ),
                    -ep.ones(
                        x_advs,
                        (len(decision, )),
                    ),
                ))
        # (steps, bs, ...)
        multipliers = ep.stack(multipliers_list, 0)

        vals = ep.where(
            ep.abs(ep.mean(multipliers, axis=0, keepdims=True)) == 1,
            multipliers,
            multipliers - ep.mean(multipliers, axis=0, keepdims=True),
        )
        grad = ep.mean(atleast_kd(vals, rv.ndim) * rv, axis=0)

        grad /= ep.norms.l2(atleast_kd(flatten(grad), grad.ndim)) + 1e-12

        return grad
Exemplo n.º 2
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Exemplo n.º 3
0
    def transform_bounds(self: ModelType,
                         bounds: BoundsInput,
                         inplace: bool = False) -> ModelType:
        """Returns a new model with the desired bounds and updates the preprocessing accordingly"""
        # more efficient than the base class implementation because it avoids the additional wrapper
        if self.bounds == bounds:
            if inplace:
                return self
            else:
                return copy.copy(self)

        a, b = self.bounds
        c, d = bounds
        f = (d - c) / (b - a)

        mean, std, flip_axis = self._preprocess_args

        if mean is None:
            mean = ep.zeros(self._dummy, 1)
        mean = f * (mean - a) + c

        if std is None:
            std = ep.ones(self._dummy, 1)
        std = f * std

        if inplace:
            model = self
        else:
            model = copy.copy(self)
        model._bounds = Bounds(*bounds)
        model._preprocess_args = (mean, std, flip_axis)
        return model
Exemplo n.º 4
0
    def __call__(self, model: Model, inputs: T, criterion: Union[Criterion, T]) -> T:

        x, restore_type = ep.astensor_(inputs)
        del inputs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Exemplo n.º 5
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[ep.Tensor] = None,
        **kwargs: Any,
    ) -> T:
        originals, restore_type = ep.astensor_(inputs)

        self._nqueries = {i: 0 for i in range(len(originals))}
        self._set_cos_sin_function(originals)
        self.theta_max = ep.ones(originals, len(originals)) * self._theta_max
        criterion = get_criterion(criterion)
        self._criterion_is_adversarial = get_is_adversarial(criterion, model)

        # Get Starting Point
        if starting_points is not None:
            best_advs = starting_points
        elif starting_points is None:
            init_attack: MinimizationAttack = LinearSearchBlendedUniformNoiseAttack(steps=50)
            best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop)
        else:
            raise ValueError("starting_points {} doesn't exist.".format(starting_points))

        assert self._is_adversarial(best_advs).all()

        # Initialize the direction orthogonalized with the first direction
        fd = best_advs - originals
        norm = ep.norms.l2(fd.flatten(1), axis=1)
        fd = fd / atleast_kd(norm, fd.ndim)
        self._directions_ortho = {i: v.expand_dims(0) for i, v in enumerate(fd)}

        # Load Basis
        if "basis_params" in kwargs:
            self._basis = Basis(originals, **kwargs["basis_params"])
        else:
            self._basis = Basis(originals)

        for _ in range(self._steps):
            # Get candidates. Shape: (n_candidates, batch_size, image_size)
            candidates = self._get_candidates(originals, best_advs)
            candidates = candidates.transpose((1, 0, 2, 3, 4))

            
            best_candidates = ep.zeros_like(best_advs).raw
            for i, o in enumerate(originals):
                o_repeated = ep.concatenate([o.expand_dims(0)] * len(candidates[i]), axis=0)
                index = ep.argmax(self.distance(o_repeated, candidates[i])).raw
                best_candidates[i] = candidates[i][index].raw

            is_success = self.distance(best_candidates, originals) < self.distance(best_advs, originals)
            best_advs = ep.where(atleast_kd(is_success, best_candidates.ndim), ep.astensor(best_candidates), best_advs)

            if all(v > self._max_queries for v in self._nqueries.values()):
                print("Max queries attained for all the images.")
                break
        return restore_type(best_advs)
Exemplo n.º 6
0
    def _binary_search(self, originals: ep.Tensor, perturbed: ep.Tensor, boost: Optional[bool] = False) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        highs = ep.ones(perturbed, len(perturbed))
        d = np.prod(perturbed.shape[1:])
        thresholds = self._BS_gamma / (d * math.sqrt(d))
        lows = ep.zeros_like(highs)

        # Boost Binary search
        if boost:
            boost_vec = 0.1 * originals + 0.9 * perturbed
            is_advs = self._is_adversarial(boost_vec)
            is_advs = atleast_kd(is_advs, originals.ndim)
            originals = ep.where(is_advs.logical_not(), boost_vec, originals)
            perturbed = ep.where(is_advs, boost_vec, perturbed)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs
        iteration = 0
        while ep.any(highs - lows > thresholds) and iteration < self._BS_max_iteration:
            iteration += 1
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = self._is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids
            if reached_numerical_precision:
                break
        
        results = self._project(originals, perturbed, highs)
        return results
Exemplo n.º 7
0
def test_flatten(dummy: Tensor) -> None:
    t = ep.ones(dummy, (16, 3, 32, 32))
    assert ep.flatten(t).shape == (16 * 3 * 32 * 32, )
    assert ep.flatten(t, start=1).shape == (16, 3 * 32 * 32)
    assert ep.flatten(t, start=2).shape == (16, 3, 32 * 32)
    assert ep.flatten(t, start=3).shape == (16, 3, 32, 32)
    assert ep.flatten(t, end=-2).shape == (16 * 3 * 32, 32)
    assert ep.flatten(t, end=-3).shape == (16 * 3, 32, 32)
    assert ep.flatten(t, end=-4).shape == (16, 3, 32, 32)
    assert ep.flatten(t, start=1, end=-2).shape == (16, 3 * 32, 32)
Exemplo n.º 8
0
def test_tensorboard(logdir: Union[Literal[False], None, str], tmp_path: Any,
                     dummy: ep.Tensor) -> None:
    if logdir == "temp":
        logdir = tmp_path

    if logdir:
        before = len(list(tmp_path.iterdir()))

    tb = fbn.tensorboard.TensorBoard(logdir)

    tb.scalar("a_scalar", 5, step=1)

    x = ep.ones(dummy, 10)
    tb.mean("a_mean", x, step=2)

    x = ep.ones(dummy, 10) == ep.arange(dummy, 10)
    tb.probability("a_probability", x, step=2)

    x = ep.arange(dummy, 10).float32()
    cond = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2)
    tb.conditional_mean("a_conditional_mean", x, cond, step=2)

    x = ep.arange(dummy, 10).float32()
    cond = ep.ones(dummy, 10) == ep.zeros(dummy, 10)
    tb.conditional_mean("a_conditional_mean_false", x, cond, step=2)

    x = ep.ones(dummy, 10) == ep.arange(dummy, 10)
    y = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2)
    tb.probability_ratio("a_probability_ratio", x, y, step=5)

    x = ep.ones(dummy, 10) == (ep.arange(dummy, 10) % 2)
    y = ep.ones(dummy, 10) == ep.zeros(dummy, 10)
    tb.probability_ratio("a_probability_ratio_y_zero", x, y, step=5)

    x = ep.arange(dummy, 10).float32()
    tb.histogram("a_histogram", x, step=9, first=False)
    tb.histogram("a_histogram", x, step=10, first=True)

    tb.close()

    if logdir:
        after = len(list(tmp_path.iterdir()))
        assert after > before  # make sure something has been written
Exemplo n.º 9
0
    def _binary_search(
        self,
        is_adversarial: Callable[[ep.Tensor], ep.Tensor],
        originals: ep.Tensor,
        perturbed: ep.Tensor,
    ) -> ep.Tensor:
        # Choose upper thresholds in binary search based on constraint.
        d = np.prod(perturbed.shape[1:])
        if self.constraint == "linf":
            highs = linf(originals, perturbed)

            # TODO: Check if the threshold is correct
            #  empirically this seems to be too low
            thresholds = highs * self.gamma / (d * d)
        else:
            highs = ep.ones(perturbed, len(perturbed))
            thresholds = self.gamma / (d * math.sqrt(d))

        lows = ep.zeros_like(highs)

        # use this variable to check when mids stays constant and the BS has converged
        old_mids = highs

        while ep.any(highs - lows > thresholds):
            mids = (lows + highs) / 2
            mids_perturbed = self._project(originals, perturbed, mids)
            is_adversarial_ = is_adversarial(mids_perturbed)

            highs = ep.where(is_adversarial_, mids, highs)
            lows = ep.where(is_adversarial_, lows, mids)

            # check of there is no more progress due to numerical imprecision
            reached_numerical_precision = (old_mids == mids).all()
            old_mids = mids

            if reached_numerical_precision:
                # TODO: warn user
                break

        res = self._project(originals, perturbed, highs)

        return res
Exemplo n.º 10
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        best = ep.ones(x, len(x))

        epsilon = 0.0
        stepsize = 1.0 / self.steps
        for _ in range(self.steps):
            # TODO: reduce the batch size to the ones that have not yet been sucessful

            is_adv = is_adversarial(x + epsilon * direction)
            is_best_adv = ep.logical_and(is_adv, best == 1)
            best = ep.where(is_best_adv, epsilon, best)

            if (best < 1).all():
                break  # pragma: no cover

            epsilon += stepsize

        eps = atleast_kd(best, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Exemplo n.º 11
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        min_, max_ = model.bounds
        target = min_ + self.target * (max_ - min_)
        direction = target - x

        lower_bound = ep.zeros(x, len(x))
        upper_bound = ep.ones(x, len(x))
        epsilons = lower_bound
        for _ in range(self.binary_search_steps):
            eps = atleast_kd(epsilons, x.ndim)
            is_adv = is_adversarial(x + eps * direction)
            lower_bound = ep.where(is_adv, lower_bound, epsilons)
            upper_bound = ep.where(is_adv, epsilons, upper_bound)
            epsilons = (lower_bound + upper_bound) / 2

        epsilons = upper_bound
        eps = atleast_kd(epsilons, x.ndim)
        xp = x + eps * direction
        return restore_type(xp)
Exemplo n.º 12
0
def test_index_update_indices_scalar(dummy: Tensor) -> Tensor:
    x = ep.ones(dummy, (3, 4))
    ind = ep.from_numpy(dummy, np.array([0, 1, 2, 1]))
    return ep.index_update(x, ep.index[ind, ep.arange(x, 4)], 33.0)
Exemplo n.º 13
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        starting_points=None,
        init_attack=None,
        criterion: Callable = misclassification,
        steps=25000,
        spherical_step=1e-2,
        source_step=1e-2,
        source_step_convergance=1e-7,
        step_adaptation=1.5,
        tensorboard=False,
        update_stats_every_k=10,
    ):
        """Boundary Attack

        Differences to the original reference implementation:
        * We do not perform internal operations with float64
        * The samples within a batch can currently influence each other a bit
        * We don't perform the additional convergence confirmation
        * The success rate tracking changed a bit
        * Some other changes due to batching and merged loops

        Parameters
        ----------
        criterion : Callable
            A callable that returns true if the given logits of perturbed
            inputs should be considered adversarial w.r.t. to the given labels
            and unperturbed inputs.
        tensorboard : str
            The log directory for TensorBoard summaries. If False, TensorBoard
            summaries will be disabled (default). If None, the logdir will be
            runs/CURRENT_DATETIME_HOSTNAME.
        """
        tb = TensorBoard(logdir=tensorboard)

        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = self.model.forward(p)
            return criterion(originals, labels, p, logits)

        if starting_points is None:
            if init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack.__name__} for initialization.")
            starting_points = init_attack(self.model)(inputs, labels)

        best_advs = ep.astensor(starting_points)
        assert is_adversarial(best_advs).all()

        N = len(originals)
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * spherical_step
        source_steps = ep.ones(originals, N) * source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = self.model.bounds()

        for step in range(1, steps + 1):
            converged = source_steps < source_step_convergance
            if converged.all():
                break
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = l2norms(unnormalized_source_directions)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim)

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds,
                originals,
                best_advs,
                unnormalized_source_directions,
                source_directions,
                source_norms,
                spherical_steps,
                source_steps,
            )
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)

            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = l2norms(originals - candidates)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv,
                           step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_spherical_adversarial.clear(
                        ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs,
                        full, step)
                    tb.probability_ratio("spherical_stats/isfull/too_linear",
                                         cond1, full, step)
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full,
                        step)

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(cond1,
                                            source_steps * step_adaptation,
                                            source_steps)
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(cond2,
                                            source_steps / step_adaptation,
                                            source_steps)
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean("step_stats/isfull/success_rate/mean",
                                        probs, full, step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full,
                        step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full,
                        step)

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
        tb.close()
        return best_advs.tensor
Exemplo n.º 14
0
def test_index_update_row(dummy: Tensor) -> Tensor:
    x = ep.ones(dummy, (3, 4))
    return ep.index_update(x, ep.index[1], ep.ones(x, 4) * 66.0)
Exemplo n.º 15
0
def test_index_update_column_scalar(dummy: Tensor) -> Tensor:
    x = ep.ones(dummy, (3, 4))
    return ep.index_update(x, ep.index[:, 1], 66.0)
Exemplo n.º 16
0
def test_ones_scalar(t: Tensor) -> Tensor:
    return ep.ones(t, 5)
Exemplo n.º 17
0
def test_ones_tuple(t: Tensor) -> Tensor:
    return ep.ones(t, (2, 3))
Exemplo n.º 18
0
    def __call__(
        self,
        model: Model,
        inputs,
        labels,
        *,
        criterion=misclassification,
        channel_axis: Optional[int] = None,
    ):
        """
        Parameters
        ----------
        channel_axis
            The axis across which the noise should be the same (if across_channels is True).
            If None, will be automatically inferred from the model if possible.
        """
        inputs, labels, restore = wrap(inputs, labels)
        is_adversarial = get_is_adversarial(criterion, inputs, labels, model)

        x0 = inputs
        N = len(x0)
        shape = list(x0.shape)
        if self.across_channels and x0.ndim > 2:
            if channel_axis is None and not hasattr(model, "data_format"):
                raise ValueError(
                    "cannot infer the data_format from the model, please specify"
                    " channel_axis when calling the attack")
            elif channel_axis is None:
                data_format = model.data_format  # type: ignore
                if (data_format is None or data_format != "channels_first"
                        and data_format != "channels_last"):
                    raise ValueError(
                        f"expected data_format to be 'channels_first' or 'channels_last'"
                    )
                channel_axis = 1 if data_format == "channels_first" else x0.ndim - 1
            elif not 0 <= channel_axis < x0.ndim:
                raise ValueError(
                    f"expected channel_axis to be in [0, {x0.ndim})")

            shape[channel_axis] = 1

        min_, max_ = model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
                                       max_probability)
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
                stepsizes)
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore(result)
Exemplo n.º 19
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, Any] = None,
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        is_adversarial = get_is_adversarial(criterion_, model)

        min_, max_ = model.bounds

        N = len(x)

        for j in range(self.directions):
            # random noise inputs tend to be classified into the same class,
            # so we might need to make very many draws if the original class
            # is that one
            random_ = ep.uniform(x, x.shape, min_, max_)
            is_adv_ = atleast_kd(is_adversarial(random_), x.ndim)

            if j == 0:
                random = random_
                is_adv = is_adv_
            else:
                random = ep.where(is_adv, random, random_)
                is_adv = is_adv.logical_or(is_adv_)

            if is_adv.all():
                break

        if not is_adv.all():
            warnings.warn(
                f"{self.__class__.__name__} failed to draw sufficient random"
                f" inputs that are adversarial ({is_adv.sum()} / {N}).")

        x0 = x

        epsilons = np.linspace(0, 1, num=self.steps + 1, dtype=np.float32)
        best = ep.ones(x, (N, ))

        for epsilon in epsilons:
            x = (1 - epsilon) * x0 + epsilon * random
            # TODO: due to limited floating point precision, clipping can be required
            is_adv = is_adversarial(x)

            epsilon = epsilon.item()

            best = ep.minimum(ep.where(is_adv, epsilon, 1.0), best)

            if (best < 1).all():
                break

        best = atleast_kd(best, x0.ndim)
        x = (1 - best) * x0 + best * random

        return restore_type(x)
Exemplo n.º 20
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        stepsize = 1.0
        min_, max_ = model.bounds

        def loss_fn(inputs: ep.Tensor,
                    labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            logits = model(inputs)

            sign = -1.0 if targeted else 1.0
            loss = sign * ep.crossentropy(logits, labels).sum()

            return loss, logits

        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        delta = ep.zeros_like(x)

        epsilon = self.init_epsilon * ep.ones(x, len(x))
        worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1)

        best_l2 = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of LR starting from 1.0 to 0.01
            stepsize = (0.01 + (stepsize - 0.01) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)

            x_adv = x + delta

            _, logits, gradients = grad_and_logits(x_adv, classes)
            gradients = normalize_gradient_l2_norms(gradients)
            is_adversarial = criterion_(x_adv, logits)

            l2 = ep.norms.l2(flatten(delta), axis=-1)
            is_smaller = l2 <= best_l2

            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_l2 = ep.where(is_both, l2, best_l2)

            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # do step
            delta = delta + stepsize * gradients

            epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma,
                                         1.0 + self.gamma)
            epsilon = ep.minimum(epsilon, worst_norm)

            # project to epsilon ball
            delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1),
                                x.ndim)

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta

        return restore_type(x_adv)
Exemplo n.º 21
0
    def __call__(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
    ) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
            change_classes_logits = self.confidence
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            change_classes_logits = -self.confidence
        else:
            raise ValueError("unsupported criterion")

        def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            if change_classes_logits != 0:
                logits += ep.onehot_like(logits, classes, value=change_classes_logits)
            return criterion_(perturbed, logits)

        if classes.shape != (N,):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}"
            )

        min_, max_ = model.bounds
        rows = range(N)

        def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            assert y_k.shape == x.shape
            assert consts.shape == (N,)

            logits = model(y_k)

            if targeted:
                c_minimize = best_other_classes(logits, classes)
                c_maximize = classes
            else:
                c_minimize = classes
                c_maximize = best_other_classes(logits, classes)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N,)

            is_adv_loss = is_adv_loss + self.confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(y_k - x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, logits

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = self.initial_const * ep.ones(x, (N,))
        lower_bounds = ep.zeros(x, (N,))
        upper_bounds = ep.inf * ep.ones(x, (N,))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.ones(x, (N,)) * ep.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(self.binary_search_steps):
            if (
                binary_search_step == self.binary_search_steps - 1
                and self.binary_search_steps >= 10
            ):
                # in the last iteration, repeat the search once
                consts = ep.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            x_k = x
            y_k = x

            found_advs = ep.full(
                x, (N,), value=False
            ).bool()  # found adv with the current consts
            loss_at_previous_check = ep.ones(x, (1,)) * ep.inf

            for iteration in range(self.steps):
                # square-root learning rate decay
                stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5

                loss, logits, gradient = loss_aux_and_grad(y_k, consts)

                x_k_old = x_k
                x_k = project_shrinkage_thresholding(
                    y_k - stepsize * gradient, x, self.regularization, min_, max_
                )
                y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old)

                if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    # TODO: loss is a scalar ep tensor. is this the bst way to
                    #  implement the condition?
                    if not ep.all(loss <= 0.9999 * loss_at_previous_check):
                        break  # stop optimization if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adversarial(x_k, logits)

                best_advs, best_advs_norms = apply_decision_rule(
                    self.decision_rule,
                    self.regularization,
                    best_advs,
                    best_advs_norms,
                    x_k,
                    x,
                    found_advs_iter,
                )

                found_advs = ep.logical_or(found_advs, found_advs_iter)

            upper_bounds = ep.where(found_advs, consts, upper_bounds)
            lower_bounds = ep.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = ep.where(
                ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search
            )

        return restore_type(best_advs)
Exemplo n.º 22
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        target_classes=None,
        binary_search_steps=9,
        max_iterations=10000,
        confidence=0,
        learning_rate=1e-2,
        initial_const=1e-3,
        abort_early=True,
    ):
        x = ep.astensor(inputs)
        N = len(x)

        targeted = target_classes is not None
        if targeted:
            labels = None
            target_classes = ep.astensor(target_classes)
            assert target_classes.shape == (N, )
            is_adv = partial(targeted_is_adv,
                             target_classes=target_classes,
                             confidence=confidence)
        else:
            labels = ep.astensor(labels)
            assert labels.shape == (N, )
            is_adv = partial(untargeted_is_adv,
                             labels=labels,
                             confidence=confidence)

        bounds = self.model.bounds()
        to_attack_space = partial(_to_attack_space, bounds=bounds)
        to_model_space = partial(_to_model_space, bounds=bounds)

        x_attack = to_attack_space(x)
        reconstsructed_x = to_model_space(x_attack)

        rows = np.arange(N)

        def loss_fun(delta: ep.Tensor, consts: ep.Tensor) -> ep.Tensor:
            assert delta.shape == x_attack.shape
            assert consts.shape == (N, )

            x = to_model_space(x_attack + delta)
            logits = ep.astensor(self.model.forward(x.tensor))

            if targeted:
                c_minimize = best_other_classes(logits, target_classes)
                c_maximize = target_classes
            else:
                c_minimize = labels
                c_maximize = best_other_classes(logits, labels)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )
            is_adv_loss = is_adv_loss + confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (x, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.ones(x, (N, )) * np.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(binary_search_steps):
            if (binary_search_step == binary_search_steps - 1
                    and binary_search_steps >= 10):
                # in the last iteration, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            delta = ep.zeros_like(x_attack)
            optimizer = AdamOptimizer(delta)

            found_advs = np.full(
                (N, ), fill_value=False)  # found adv with the current consts
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x, consts.astype(np.float32))

            for iteration in range(max_iterations):
                loss, (perturbed,
                       logits), gradient = loss_aux_and_grad(delta, consts_)
                delta += optimizer(gradient, learning_rate)

                if abort_early and iteration % (np.ceil(
                        max_iterations / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adv(logits)
                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

                norms = flatten(perturbed - x).square().sum(axis=-1).sqrt()
                closer = norms < best_advs_norms
                new_best = closer.float32() * found_advs_iter.float32()

                best_advs = (
                    atleast_kd(new_best, best_advs.ndim) * perturbed +
                    (1 - atleast_kd(new_best, best_advs.ndim)) * best_advs)
                best_advs_norms = new_best * norms + (
                    1 - new_best) * best_advs_norms

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return best_advs.tensor
Exemplo n.º 23
0
def test_max_axes(dummy: Tensor) -> Tensor:
    t = ep.ones(dummy, 30).float32().reshape((3, 5, 2))
    return ep.max(t, axis=(0, 1))
Exemplo n.º 24
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        criterion_ = get_criterion(criterion)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        def loss_fn(
            inputs: ep.Tensor, labels: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:

            logits = model(inputs)

            if targeted:
                c_minimize = best_other_classes(logits, labels)
                c_maximize = labels  # target_classes
            else:
                c_minimize = labels  # labels
                c_maximize = best_other_classes(logits, labels)

            loss = logits[rows, c_minimize] - logits[rows, c_maximize]

            return -loss.sum(), (logits, loss)

        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs
        N = len(x)

        # start from initialization points/attack
        if starting_points is not None:
            x1 = starting_points
        else:
            if self.init_attack is not None:
                x1 = self.init_attack.run(model, x, criterion_)
            else:
                x1 = None

        # if initial points or initialization attacks are provided,
        #   search for the boundary
        if x1 is not None:
            is_adv = get_is_adversarial(criterion_, model)
            assert is_adv(x1).all()
            lower_bound = ep.zeros(x, shape=(N, ))
            upper_bound = ep.ones(x, shape=(N, ))
            for _ in range(self.binary_search_steps):
                epsilons = (lower_bound + upper_bound) / 2
                mid_points = self.mid_points(x, x1, epsilons, model.bounds)
                is_advs = is_adv(mid_points)
                lower_bound = ep.where(is_advs, lower_bound, epsilons)
                upper_bound = ep.where(is_advs, epsilons, upper_bound)
            starting_points = self.mid_points(x, x1, upper_bound, model.bounds)
            delta = starting_points - x
        else:
            # start from x0
            delta = ep.zeros_like(x)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds
        rows = range(N)
        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        if self.p != 0:
            epsilon = ep.inf * ep.ones(x, len(x))
        else:
            epsilon = ep.ones(x, len(x)) if x1 is None \
                else ep.norms.l0(flatten(delta), axis=-1)
        if self.p != 0:
            worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)),
                                     p=self.p,
                                     axis=-1)
        else:
            worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32()

        best_lp = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of learning rates
            stepsize = (self.min_stepsize +
                        (self.max_stepsize - self.min_stepsize) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)
            gamma = (0.001 + (self.gamma - 0.001) *
                     (1 + math.cos(math.pi * (i / self.steps))) / 2)

            x_adv = x + delta

            loss, (logits,
                   loss_batch), gradients = grad_and_logits(x_adv, classes)
            is_adversarial = criterion_(x_adv, logits)

            lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1)
            is_smaller = lp <= best_lp
            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_lp = ep.where(is_both, lp, best_lp)
            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # update epsilon
            if self.p != 0:
                distance_to_boundary = abs(loss_batch) / ep.norms.lp(
                    flatten(gradients), p=self.dual, axis=-1)
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        epsilon * (1 - gamma),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.where(
                        adv_found, epsilon * (1 + gamma),
                        ep.norms.lp(flatten(delta), p=self.p, axis=-1) +
                        distance_to_boundary))
            else:
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        ep.minimum(epsilon - 1,
                                   (epsilon * (1 - gamma)).astype(int).astype(
                                       epsilon.dtype)),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.maximum(epsilon + 1,
                               (epsilon * (1 + gamma)).astype(int).astype(
                                   epsilon.dtype)))
                epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype)

            # clip epsilon
            epsilon = ep.minimum(epsilon, worst_norm)

            # computes normalized gradient update
            grad_ = self.normalize(gradients, x=x,
                                   bounds=model.bounds) * stepsize

            # do step
            delta = delta + grad_

            # project according to the given norm
            delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta
        return restore_type(x_adv)
Exemplo n.º 25
0
    def __call__(self, inputs, labels, *, criterion, steps=1000):
        originals = ep.astensor(inputs)
        labels = ep.astensor(labels)

        def is_adversarial(p: ep.Tensor) -> ep.Tensor:
            """For each input in x, returns true if it is an adversarial for
            the given model and criterion"""
            logits = ep.astensor(self.model.forward(p.tensor))
            return criterion(originals, labels, p, logits)

        x0 = ep.astensor(inputs)

        N = len(x0)
        shape = list(x0.shape)
        if self.channel_axis is not None:
            shape[self.channel_axis] = 1

        min_, max_ = self.model.bounds()
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N), ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / steps
        p = stepsizes

        for step in range(steps):
            # add salt and pepper
            u = ep.uniform(x0, shape)
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).square().sum(axis=-1).sqrt()
            closer = norms < best_advs_norms
            is_adv = is_adversarial(x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(
                is_best_adv, ep.minimum(p * 1.2, 1.0), max_probability
            )
            remaining = steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining, stepsizes
            )
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return result.tensor
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization.")
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            best_advs = init_attack.run(model,
                                        originals,
                                        criterion,
                                        early_stop=early_stop)
        else:
            best_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(best_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs")
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        N = len(originals)
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * self.spherical_step
        source_steps = ep.ones(originals, N) * self.source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = model.bounds

        self.class_1 = []
        self.class_2 = []

        self.surrogate_model = None
        device = model.device
        train_step = 500

        for step in tqdm(range(1, self.steps + 1)):
            converged = source_steps < self.source_step_convergance
            if converged.all():
                break  # pragma: no cover
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = ep.norms.l2(flatten(unnormalized_source_directions),
                                       axis=-1)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim)

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % self.update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds, originals, best_advs, unnormalized_source_directions,
                source_directions, source_norms, spherical_steps, source_steps,
                self.surrogate_model)
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)
            is_adv_spherical_candidates = is_adversarial(spherical_candidates)

            if is_adv.item():
                self.class_1.append(candidates)

            if not is_adv_spherical_candidates.item():
                self.class_2.append(spherical_candidates)

            if (step % train_step == 0) and (step > 0):

                start_time = time()

                class_1 = self.class_1
                class_2 = self.class_2

                class_1 = np.array([image.numpy()[0] for image in class_1])
                class_2 = np.array([image.numpy()[0] for image in class_2])

                class_2 = class_2[:len(class_1)]
                data = np.concatenate([class_1, class_2])
                labels = np.append(np.ones(len(class_1)),
                                   np.zeros(len(class_2)))

                X = torch.tensor(data).to(device)
                y = torch.tensor(labels, dtype=torch.long).to(device)

                if self.surrogate_model is None:
                    model_sur = torchvision.models.resnet18(pretrained=True)
                    #model.features[0] = torch.nn.Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
                    model_sur.fc = torch.nn.Linear(in_features=512,
                                                   out_features=2,
                                                   bias=True)
                    model_sur = model_sur.to(device)
                else:
                    model_sur = model_surrogate

                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42)

                optimizer = torch.optim.Adam(model_sur.parameters(), lr=3e-4)
                loss = torch.nn.CrossEntropyLoss()

                model_surrogate, accuracy_history_test, accuracy_history_train = train(
                    model_sur, optimizer, loss, X_train, y_train, X_test,
                    y_test)
                model_surrogate = model_surrogate.eval()

                self.surrogate_model = fb.PyTorchModel(model_surrogate,
                                                       bounds=(0, 1),
                                                       device=device)

                end_time = time()

                #print('Time for train: ', np.round(end_time - start_time, 2))
                #print('\n')

            spherical_is_adv: Optional[ep.Tensor]
            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = ep.norms.l2(flatten(originals - candidates), axis=-1)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv,
                           step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * self.step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation,
                        source_steps)
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / self.step_adaptation,
                        spherical_steps)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation,
                        source_steps)
                    stats_spherical_adversarial.clear(
                        ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs,
                        full, step)
                    tb.probability_ratio("spherical_stats/isfull/too_linear",
                                         cond1, full, step)
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full,
                        step)

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation,
                        source_steps)
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation,
                        source_steps)
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean("step_stats/isfull/success_rate/mean",
                                        probs, full, step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full,
                        step)
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full,
                        step)

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
        tb.close()
        return restore_type(best_advs)
Exemplo n.º 27
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        epsilons: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs
        
        if self.eps_early_stop and len(epsilons)!=1: print('epsilon-based early stopping only possible for one epsilon value')
        assert not(self.eps_early_stop and len(epsilons)!=1)
        

        verify_input_bounds(originals, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization."
                )
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            best_advs = init_attack.run(
                model, originals, criterion, early_stop=early_stop
            )
        else:
            best_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(best_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs"
                )
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        N = len(originals)
        epsilon = ep.astensor(epsilons[0] * ep.ones(originals,(N,)))
        ndim = originals.ndim
        spherical_steps = ep.ones(originals, N) * self.spherical_step
        source_steps = ep.ones(originals, N) * self.source_step

        tb.scalar("batchsize", N, 0)

        # create two queues for each sample to track success rates
        # (used to update the hyper parameters)
        stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N)
        stats_step_adversarial = ArrayQueue(maxlen=30, N=N)

        bounds = model.bounds

        for step in range(1, self.steps + 1):
            converged = source_steps < self.source_step_convergance
            if converged.all():
                break  # pragma: no cover
            converged = atleast_kd(converged, ndim)

            # TODO: performance: ignore those that have converged
            # (we could select the non-converged ones, but we currently
            # cannot easily invert this in the end using EagerPy)

            unnormalized_source_directions = originals - best_advs
            source_norms = ep.norms.l2(flatten(unnormalized_source_directions), axis=-1)
            source_directions = unnormalized_source_directions / atleast_kd(
                source_norms, ndim
            )

            # only check spherical candidates every k steps
            check_spherical_and_update_stats = step % self.update_stats_every_k == 0

            candidates, spherical_candidates = draw_proposals(
                bounds,
                originals,
                best_advs,
                unnormalized_source_directions,
                source_directions,
                source_norms,
                spherical_steps,
                source_steps,
            )
            candidates.dtype == originals.dtype
            spherical_candidates.dtype == spherical_candidates.dtype

            is_adv = is_adversarial(candidates)

            spherical_is_adv: Optional[ep.Tensor]
            if check_spherical_and_update_stats:
                spherical_is_adv = is_adversarial(spherical_candidates)
                stats_spherical_adversarial.append(spherical_is_adv)
                # TODO: algorithm: the original implementation ignores those samples
                # for which spherical is not adversarial and continues with the
                # next iteration -> we estimate different probabilities (conditional vs. unconditional)
                # TODO: thoughts: should we always track this because we compute it anyway
                stats_step_adversarial.append(is_adv)
            else:
                spherical_is_adv = None

            # in theory, we are closer per construction
            # but limited numerical precision might break this
            distances = ep.norms.l2(flatten(originals - candidates), axis=-1)
            closer = distances < source_norms
            is_best_adv = ep.logical_and(is_adv, closer)
            is_best_adv = atleast_kd(is_best_adv, ndim)

            cond = converged.logical_not().logical_and(is_best_adv)
            best_advs = ep.where(cond, candidates, best_advs)

            tb.probability("converged", converged, step)
            tb.scalar("updated_stats", check_spherical_and_update_stats, step)
            tb.histogram("norms", source_norms, step)
            tb.probability("is_adv", is_adv, step)
            if spherical_is_adv is not None:
                tb.probability("spherical_is_adv", spherical_is_adv, step)
            tb.histogram("candidates/distances", distances, step)
            tb.probability("candidates/closer", closer, step)
            tb.probability("candidates/is_best_adv", is_best_adv, step)
            tb.probability("new_best_adv_including_converged", is_best_adv, step)
            tb.probability("new_best_adv", cond, step)

            if check_spherical_and_update_stats:
                full = stats_spherical_adversarial.isfull()
                tb.probability("spherical_stats/full", full, step)
                if full.any():
                    probs = stats_spherical_adversarial.mean()
                    cond1 = ep.logical_and(probs > 0.5, full)
                    spherical_steps = ep.where(
                        cond1, spherical_steps * self.step_adaptation, spherical_steps
                    )
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation, source_steps
                    )
                    cond2 = ep.logical_and(probs < 0.2, full)
                    spherical_steps = ep.where(
                        cond2, spherical_steps / self.step_adaptation, spherical_steps
                    )
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation, source_steps
                    )
                    stats_spherical_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "spherical_stats/isfull/success_rate/mean", probs, full, step
                    )
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_linear", cond1, full, step
                    )
                    tb.probability_ratio(
                        "spherical_stats/isfull/too_nonlinear", cond2, full, step
                    )

                full = stats_step_adversarial.isfull()
                tb.probability("step_stats/full", full, step)
                if full.any():
                    probs = stats_step_adversarial.mean()
                    # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess)
                    # instead of p(source_step_success | spherical_step_sucess) that was tracked before
                    cond1 = ep.logical_and(probs > 0.25, full)
                    source_steps = ep.where(
                        cond1, source_steps * self.step_adaptation, source_steps
                    )
                    cond2 = ep.logical_and(probs < 0.1, full)
                    source_steps = ep.where(
                        cond2, source_steps / self.step_adaptation, source_steps
                    )
                    stats_step_adversarial.clear(ep.logical_or(cond1, cond2))
                    tb.conditional_mean(
                        "step_stats/isfull/success_rate/mean", probs, full, step
                    )
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_high", cond1, full, step
                    )
                    tb.probability_ratio(
                        "step_stats/isfull/success_rate_too_low", cond2, full, step
                    )

            tb.histogram("spherical_step", spherical_steps, step)
            tb.histogram("source_step", source_steps, step)
            best_advs_norms = flatten(originals - best_advs).norms.l2(axis=-1)
            if self.eps_early_stop and (ep.maximum(best_advs_norms,epsilon) == epsilon).all():
                print('early stopped because epsilon condition satisfied')
                break
        tb.close()
        return restore_type(best_advs)
Exemplo n.º 28
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        starting_points: Optional[T] = None,
        epsilons: float,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        originals, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        N = len(originals)
        if self.eps_early_stop and len(epsilons) != 1:
            print(
                'epsilon-based early stopping only possible for one epsilon value'
            )
        assert not (self.eps_early_stop and len(epsilons) != 1)

        epsilon = ep.astensor(epsilons[0] * ep.ones(originals, (N, )))
        del epsilons

        verify_input_bounds(originals, model)

        criterion = get_criterion(criterion)
        is_adversarial = get_is_adversarial(criterion, model)

        if starting_points is None:
            init_attack: MinimizationAttack
            if self.init_attack is None:
                init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50)
                logging.info(
                    f"Neither starting_points nor init_attack given. Falling"
                    f" back to {init_attack!r} for initialization.")
            else:
                init_attack = self.init_attack
            # TODO: use call and support all types of attacks (once early_stop is
            # possible in __call__)
            x_advs = init_attack.run(model,
                                     originals,
                                     criterion,
                                     early_stop=early_stop)
        else:
            x_advs = ep.astensor(starting_points)

        is_adv = is_adversarial(x_advs)
        if not is_adv.all():
            failed = is_adv.logical_not().float32().sum()
            if starting_points is None:
                raise ValueError(
                    f"init_attack failed for {failed} of {len(is_adv)} inputs")
            else:
                raise ValueError(
                    f"{failed} of {len(is_adv)} starting_points are not adversarial"
                )
        del starting_points

        tb = TensorBoard(logdir=self.tensorboard)

        # Project the initialization to the boundary.
        x_advs = self._binary_search(is_adversarial, originals, x_advs)

        assert ep.all(is_adversarial(x_advs))

        distances = self.distance(originals, x_advs)

        for step in range(self.steps):
            delta = self.select_delta(originals, distances, step)

            # Choose number of gradient estimation steps.
            num_gradient_estimation_steps = int(
                min([
                    self.initial_num_evals * math.sqrt(step + 1),
                    self.max_num_evals
                ]))

            gradients = self.approximate_gradients(
                is_adversarial, x_advs, num_gradient_estimation_steps, delta)

            if self.constraint == "linf":
                update = ep.sign(gradients)
            else:
                update = gradients

            if self.stepsize_search == "geometric_progression":
                # find step size.
                epsilons = distances / math.sqrt(step + 1)

                while True:
                    x_advs_proposals = ep.clip(
                        x_advs + atleast_kd(epsilons, x_advs.ndim) * update, 0,
                        1)
                    success = is_adversarial(x_advs_proposals)
                    epsilons = ep.where(success, epsilons, epsilons / 2.0)

                    if ep.all(success):
                        break

                # Update the sample.
                x_advs = ep.clip(
                    x_advs + atleast_kd(epsilons, update.ndim) * update, 0, 1)

                assert ep.all(is_adversarial(x_advs))

                # Binary search to return to the boundary.
                x_advs = self._binary_search(is_adversarial, originals, x_advs)

                assert ep.all(is_adversarial(x_advs))

            elif self.stepsize_search == "grid_search":
                # Grid search for stepsize.
                epsilons_grid = ep.expand_dims(
                    ep.from_numpy(
                        distances,
                        np.logspace(
                            -4, 0, num=20, endpoint=True, dtype=np.float32),
                    ),
                    1,
                ) * ep.expand_dims(distances, 0)

                proposals_list = []

                for epsilons in epsilons_grid:
                    x_advs_proposals = (
                        x_advs + atleast_kd(epsilons, update.ndim) * update)
                    x_advs_proposals = ep.clip(x_advs_proposals, 0, 1)

                    mask = is_adversarial(x_advs_proposals)

                    x_advs_proposals = self._binary_search(
                        is_adversarial, originals, x_advs_proposals)

                    # only use new values where initial guess was already adversarial
                    x_advs_proposals = ep.where(atleast_kd(mask, x_advs.ndim),
                                                x_advs_proposals, x_advs)

                    proposals_list.append(x_advs_proposals)

                proposals = ep.stack(proposals_list, 0)
                proposals_distances = self.distance(
                    ep.expand_dims(originals, 0), proposals)
                minimal_idx = ep.argmin(proposals_distances, 0)

                x_advs = proposals[minimal_idx]

            distances = self.distance(originals, x_advs)
            # log stats
            tb.histogram("norms", distances, step)
            if self.eps_early_stop and (ep.maximum(distances, epsilon)
                                        == epsilon).all():
                print('early stopped because epsilon condition satisfied')
                break

        return restore_type(x_advs)
Exemplo n.º 29
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        target_classes=None,
        binary_search_steps=9,
        max_iterations=10000,
        confidence=0,
        initial_learning_rate=1e-2,
        regularization=1e-2,
        initial_const=1e-3,
        abort_early=True,
        decision_rule="EN",
    ):
        x_0 = ep.astensor(inputs)
        N = len(x_0)

        assert decision_rule in ("EN", "L1")

        targeted = target_classes is not None
        if targeted:
            labels = None
            target_classes = ep.astensor(target_classes)
            assert target_classes.shape == (N, )
            is_adv = partial(targeted_is_adv,
                             target_classes=target_classes,
                             confidence=confidence)
        else:
            labels = ep.astensor(labels)
            assert labels.shape == (N, )
            is_adv = partial(untargeted_is_adv,
                             labels=labels,
                             confidence=confidence)

        min_, max_ = self.model.bounds()

        rows = np.arange(N)

        def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> ep.Tensor:
            assert y_k.shape == x_0.shape
            assert consts.shape == (N, )

            logits = ep.astensor(self.model.forward(y_k.tensor))

            if targeted:
                c_minimize = best_other_classes(logits, target_classes)
                c_maximize = target_classes
            else:
                c_minimize = labels
                c_maximize = best_other_classes(logits, labels)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )
            is_adv_loss = is_adv_loss + confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(y_k - x_0).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (y_k, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x_0, loss_fun, has_aux=True)

        consts = initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x_0)
        best_advs_norms = ep.ones(x_0, (N, )) * np.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(binary_search_steps):
            if (binary_search_step == binary_search_steps - 1
                    and binary_search_steps >= 10):
                # in the last iteration, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            # TODO: rewrite this once eagerpy supports .copy()
            x_k = x_0  # ep.zeros_like(x_0) + x_0
            y_k = x_0  # ep.zeros_like(x_0) + x_0

            found_advs = np.full(
                (N, ), fill_value=False)  # found adv with the current consts
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x_0, consts.astype(np.float32))

            for iteration in range(max_iterations):
                # square-root learning rate decay
                learning_rate = (initial_learning_rate *
                                 (1.0 - iteration / max_iterations)**0.5)

                loss, (x, logits), gradient = loss_aux_and_grad(x_k, consts_)

                x_k_old = x_k
                x_k = project_shrinkage_thresholding(
                    y_k - learning_rate * gradient, x_0, regularization, min_,
                    max_)
                y_k = x_k + iteration / (iteration + 3) - (x_k - x_k_old)

                if abort_early and iteration % (np.ceil(
                        max_iterations / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adv(logits)

                best_advs, best_advs_norms = apply_decision_rule(
                    decision_rule,
                    regularization,
                    best_advs,
                    best_advs_norms,
                    x_k,
                    x_0,
                    found_advs_iter,
                )

                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return best_advs.tensor
Exemplo n.º 30
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Misclassification,
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x0, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        is_adversarial = get_is_adversarial(criterion_, model)

        N = len(x0)
        shape = list(x0.shape)

        if self.across_channels and x0.ndim > 2:
            if self.channel_axis is None:
                channel_axis = get_channel_axis(model, x0.ndim)
            else:
                channel_axis = self.channel_axis % x0.ndim
            if channel_axis is not None:
                shape[channel_axis] = 1

        min_, max_ = model.bounds
        r = max_ - min_

        result = x0
        is_adv = is_adversarial(result)
        best_advs_norms = ep.where(is_adv, ep.zeros(x0, N),
                                   ep.full(x0, N, ep.inf))
        min_probability = ep.zeros(x0, N)
        max_probability = ep.ones(x0, N)
        stepsizes = max_probability / self.steps
        p = stepsizes

        for step in range(self.steps):
            # add salt and pepper
            u = ep.uniform(x0, tuple(shape))
            p_ = atleast_kd(p, x0.ndim)
            salt = (u >= 1 - p_ / 2).astype(x0.dtype) * r
            pepper = -(u < p_ / 2).astype(x0.dtype) * r
            x = x0 + salt + pepper
            x = ep.clip(x, min_, max_)

            # check if we found new best adversarials
            norms = flatten(x).norms.l2(axis=-1)
            closer = norms < best_advs_norms
            is_adv = is_adversarial(
                x)  # TODO: ignore those that are not closer anyway
            is_best_adv = ep.logical_and(is_adv, closer)

            # update results and search space
            result = ep.where(atleast_kd(is_best_adv, x.ndim), x, result)
            best_advs_norms = ep.where(is_best_adv, norms, best_advs_norms)
            min_probability = ep.where(is_best_adv, 0.5 * p, min_probability)
            # we set max_probability a bit higher than p because the relationship
            # between p and norms is not strictly monotonic
            max_probability = ep.where(is_best_adv, ep.minimum(p * 1.2, 1.0),
                                       max_probability)
            remaining = self.steps - step
            stepsizes = ep.where(
                is_best_adv, (max_probability - min_probability) / remaining,
                stepsizes)
            reset = p == max_probability
            p = ep.where(ep.logical_or(is_best_adv, reset), min_probability, p)
            p = ep.minimum(p + stepsizes, max_probability)

        return restore_type(result)