示例#1
0
    def __call__(self, model: Model, inputs: T,
                 criterion: Union[Misclassification, T]) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            classes = criterion_.labels
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            raise ValueError(
                f"expected labels to have shape ({N},), got {classes.shape}")

        bounds = model.bounds

        def loss_fun(delta: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            assert x.shape[0] == logits.shape[0]
            assert delta.shape == x.shape

            x_hat = x + delta
            logits_hat = model(x_hat)
            loss = ep.kl_div_with_logits(logits, logits_hat).sum()

            return loss

        value_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=False)

        clean_logits = model(x)

        # start with random vector as search vector
        d = ep.normal(x, shape=x.shape, mean=0, stddev=1)
        for it in range(self.iterations):
            # normalize proposal to be unit vector
            d = d * self.xi / atleast_kd(ep.norms.l2(flatten(d), axis=-1),
                                         x.ndim)

            # use gradient of KL divergence as new search vector
            _, grad = value_and_grad(d, clean_logits)
            d = grad

            # rescale search vector
            d = (bounds[1] - bounds[0]) * d

            if ep.any(ep.norms.l2(flatten(d), axis=-1) < 1e-64):
                raise RuntimeError(
                    "Gradient vanished; this can happen if xi is too small.")

        final_delta = (self.epsilon / ep.sqrt(
            (d**2).sum(keepdims=True, axis=(1, 2, 3))) * d)
        x_adv = ep.clip(x + final_delta, *bounds)

        return restore_type(x_adv)
示例#2
0
def test_value_and_grad_fn(dummy: Tensor) -> None:
    if isinstance(dummy, ep.NumPyTensor):
        pytest.skip()

    def f(x: ep.Tensor) -> ep.Tensor:
        return x.square().sum()

    vgf = ep.value_and_grad_fn(dummy, f)
    t = ep.arange(dummy, 8).float32().reshape((2, 4))
    v, g = vgf(t)
    assert v.item() == 140
    assert (g == 2 * t).all()
示例#3
0
def test_value_and_grad_fn_with_aux(dummy: Tensor) -> None:
    if isinstance(dummy, ep.NumPyTensor):
        pytest.skip()

    def f(x: Tensor) -> Tuple[Tensor, Tensor]:
        x = x.square()
        return x.sum(), x

    vgf = ep.value_and_grad_fn(dummy, f, has_aux=True)
    t = ep.arange(dummy, 8).float32().reshape((2, 4))
    v, aux, g = vgf(t)
    assert v.item() == 140
    assert (aux == t.square()).all()
    assert (g == 2 * t).all()
示例#4
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Criterion, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        #raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        del inputs, kwargs

        verify_input_bounds(x, model)

        criterion = get_criterion(criterion)

        min_, max_ = model.bounds

        logits = model(x)
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if self.candidates is None:
            candidates = logits.shape[-1]  # pragma: no cover
        else:
            candidates = min(self.candidates, logits.shape[-1])
            if not candidates >= 2:
                raise ValueError(  # pragma: no cover
                    f"expected the model output to have atleast 2 classes, got {logits.shape[-1]}"
                )
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        N = len(x)
        rows = range(N)

        loss_fun = self._get_loss_fn(model, classes)
        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        x0 = x
        p_total = ep.zeros_like(x)
        for _ in range(self.steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]

            is_adv = criterion(x, logits)
            if is_adv.all():
                break

            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs_ = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([lo for lo, _ in diffs_], axis=1)
            grads = ep.stack([g for _, g in diffs_], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N,)
            assert losses.shape == (N,)
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(
                atleast_kd(is_adv, x.ndim), x, x0 + (1.0 + self.overshoot) * p_total
            )
            x = ep.clip(x, min_, max_)

        return restore_type(x)
示例#5
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        target_classes=None,
        binary_search_steps=9,
        max_iterations=10000,
        confidence=0,
        initial_learning_rate=1e-2,
        regularization=1e-2,
        initial_const=1e-3,
        abort_early=True,
        decision_rule="EN",
    ):
        x_0 = ep.astensor(inputs)
        N = len(x_0)

        assert decision_rule in ("EN", "L1")

        targeted = target_classes is not None
        if targeted:
            labels = None
            target_classes = ep.astensor(target_classes)
            assert target_classes.shape == (N, )
            is_adv = partial(targeted_is_adv,
                             target_classes=target_classes,
                             confidence=confidence)
        else:
            labels = ep.astensor(labels)
            assert labels.shape == (N, )
            is_adv = partial(untargeted_is_adv,
                             labels=labels,
                             confidence=confidence)

        min_, max_ = self.model.bounds()

        rows = np.arange(N)

        def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> ep.Tensor:
            assert y_k.shape == x_0.shape
            assert consts.shape == (N, )

            logits = ep.astensor(self.model.forward(y_k.tensor))

            if targeted:
                c_minimize = best_other_classes(logits, target_classes)
                c_maximize = target_classes
            else:
                c_minimize = labels
                c_maximize = best_other_classes(logits, labels)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )
            is_adv_loss = is_adv_loss + confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(y_k - x_0).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (y_k, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x_0, loss_fun, has_aux=True)

        consts = initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x_0)
        best_advs_norms = ep.ones(x_0, (N, )) * np.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(binary_search_steps):
            if (binary_search_step == binary_search_steps - 1
                    and binary_search_steps >= 10):
                # in the last iteration, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            # TODO: rewrite this once eagerpy supports .copy()
            x_k = x_0  # ep.zeros_like(x_0) + x_0
            y_k = x_0  # ep.zeros_like(x_0) + x_0

            found_advs = np.full(
                (N, ), fill_value=False)  # found adv with the current consts
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x_0, consts.astype(np.float32))

            for iteration in range(max_iterations):
                # square-root learning rate decay
                learning_rate = (initial_learning_rate *
                                 (1.0 - iteration / max_iterations)**0.5)

                loss, (x, logits), gradient = loss_aux_and_grad(x_k, consts_)

                x_k_old = x_k
                x_k = project_shrinkage_thresholding(
                    y_k - learning_rate * gradient, x_0, regularization, min_,
                    max_)
                y_k = x_k + iteration / (iteration + 3) - (x_k - x_k_old)

                if abort_early and iteration % (np.ceil(
                        max_iterations / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adv(logits)

                best_advs, best_advs_norms = apply_decision_rule(
                    decision_rule,
                    regularization,
                    best_advs,
                    best_advs_norms,
                    x_k,
                    x_0,
                    found_advs_iter,
                )

                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return best_advs.tensor
示例#6
0
    def __call__(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
    ) -> T:
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
            change_classes_logits = self.confidence
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            change_classes_logits = -self.confidence
        else:
            raise ValueError("unsupported criterion")

        def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor:
            if change_classes_logits != 0:
                logits += ep.onehot_like(logits, classes, value=change_classes_logits)
            return criterion_(perturbed, logits)

        if classes.shape != (N,):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}"
            )

        min_, max_ = model.bounds
        rows = range(N)

        def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            assert y_k.shape == x.shape
            assert consts.shape == (N,)

            logits = model(y_k)

            if targeted:
                c_minimize = best_other_classes(logits, classes)
                c_maximize = classes
            else:
                c_minimize = classes
                c_maximize = best_other_classes(logits, classes)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N,)

            is_adv_loss = is_adv_loss + self.confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(y_k - x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, logits

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = self.initial_const * ep.ones(x, (N,))
        lower_bounds = ep.zeros(x, (N,))
        upper_bounds = ep.inf * ep.ones(x, (N,))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.ones(x, (N,)) * ep.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(self.binary_search_steps):
            if (
                binary_search_step == self.binary_search_steps - 1
                and self.binary_search_steps >= 10
            ):
                # in the last iteration, repeat the search once
                consts = ep.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            x_k = x
            y_k = x

            found_advs = ep.full(
                x, (N,), value=False
            ).bool()  # found adv with the current consts
            loss_at_previous_check = ep.ones(x, (1,)) * ep.inf

            for iteration in range(self.steps):
                # square-root learning rate decay
                stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5

                loss, logits, gradient = loss_aux_and_grad(y_k, consts)

                x_k_old = x_k
                x_k = project_shrinkage_thresholding(
                    y_k - stepsize * gradient, x, self.regularization, min_, max_
                )
                y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old)

                if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    # TODO: loss is a scalar ep tensor. is this the bst way to
                    #  implement the condition?
                    if not ep.all(loss <= 0.9999 * loss_at_previous_check):
                        break  # stop optimization if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adversarial(x_k, logits)

                best_advs, best_advs_norms = apply_decision_rule(
                    self.decision_rule,
                    self.regularization,
                    best_advs,
                    best_advs_norms,
                    x_k,
                    x,
                    found_advs_iter,
                )

                found_advs = ep.logical_or(found_advs, found_advs_iter)

            upper_bounds = ep.where(found_advs, consts, upper_bounds)
            lower_bounds = ep.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = ep.where(
                ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search
            )

        return restore_type(best_advs)
示例#7
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        stepsize = 1.0
        min_, max_ = model.bounds

        def loss_fn(inputs: ep.Tensor,
                    labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            logits = model(inputs)

            sign = -1.0 if targeted else 1.0
            loss = sign * ep.crossentropy(logits, labels).sum()

            return loss, logits

        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        delta = ep.zeros_like(x)

        epsilon = self.init_epsilon * ep.ones(x, len(x))
        worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1)

        best_l2 = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of LR starting from 1.0 to 0.01
            stepsize = (0.01 + (stepsize - 0.01) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)

            x_adv = x + delta

            _, logits, gradients = grad_and_logits(x_adv, classes)
            gradients = normalize_gradient_l2_norms(gradients)
            is_adversarial = criterion_(x_adv, logits)

            l2 = ep.norms.l2(flatten(delta), axis=-1)
            is_smaller = l2 <= best_l2

            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_l2 = ep.where(is_both, l2, best_l2)

            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # do step
            delta = delta + stepsize * gradients

            epsilon = epsilon * ep.where(is_adversarial, 1.0 - self.gamma,
                                         1.0 + self.gamma)
            epsilon = ep.minimum(epsilon, worst_norm)

            # project to epsilon ball
            delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1),
                                x.ndim)

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta

        return restore_type(x_adv)
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        del inputs, criterion, kwargs

        N = len(x)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
            change_classes_logits = self.confidence
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
            change_classes_logits = -self.confidence
        else:
            raise ValueError("unsupported criterion")

        def is_adversarial(perturbed: ep.Tensor,
                           logits: ep.Tensor) -> ep.Tensor:
            if change_classes_logits != 0:
                logits += ep.onehot_like(logits,
                                         classes,
                                         value=change_classes_logits)
            return criterion_(perturbed, logits)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        bounds = model.bounds
        to_attack_space = partial(_to_attack_space, bounds=bounds)
        to_model_space = partial(_to_model_space, bounds=bounds)

        x_attack = to_attack_space(x)
        reconstsructed_x = to_model_space(x_attack)

        rows = range(N)

        def loss_fun(
            delta: ep.Tensor, consts: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:
            assert delta.shape == x_attack.shape
            assert consts.shape == (N, )

            x = to_model_space(x_attack + delta)
            logits = model(x)

            if targeted:
                c_minimize = best_other_classes(logits, classes)
                c_maximize = classes  # target_classes
            else:
                c_minimize = classes  # labels
                c_maximize = best_other_classes(logits, classes)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )

            is_adv_loss = is_adv_loss + self.confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (x, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = self.initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.full(x, (N, ), ep.inf)

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(self.binary_search_steps):
            if (binary_search_step == self.binary_search_steps - 1
                    and self.binary_search_steps >= 10):
                # in the last binary search step, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            delta = ep.zeros_like(x_attack)
            optimizer = AdamOptimizer(delta)

            # tracks whether adv with the current consts was found
            found_advs = np.full((N, ), fill_value=False)
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x, consts.astype(np.float32))

            for step in range(self.steps):
                loss, (perturbed,
                       logits), gradient = loss_aux_and_grad(delta, consts_)
                delta += optimizer(gradient, self.stepsize)

                if self.abort_early and step % (np.ceil(self.steps / 10)) == 0:
                    # after each tenth of the overall steps, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adversarial(perturbed, logits)
                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

                norms = flatten(perturbed - x).norms.l2(axis=-1)
                closer = norms < best_advs_norms
                new_best = ep.logical_and(closer, found_advs_iter)

                new_best_ = atleast_kd(new_best, best_advs.ndim)
                best_advs = ep.where(new_best_, perturbed, best_advs)
                best_advs_norms = ep.where(new_best, norms, best_advs_norms)

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return restore_type(best_advs)
示例#9
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        p,
        candidates=10,
        overshoot=0.02,
        steps=50,
        loss="logits",
    ):
        """
        Parameters
        ----------
        p : int or float
            Lp-norm that should be minimzed, must be 2 or np.inf.
        candidates : int
            Limit on the number of the most likely classes that should
            be considered. A small value is usually sufficient and much
            faster.
        overshoot : float
        steps : int
            Maximum number of steps to perform.
        """

        if not (1 <= p <= np.inf):
            raise ValueError
        if p not in [2, np.inf]:
            raise NotImplementedError

        min_, max_ = self.model.bounds()

        inputs = ep.astensor(inputs)
        labels = ep.astensor(labels)

        N = len(inputs)

        logits = self.model.forward(inputs)
        candidates = min(candidates, logits.shape[-1])
        classes = logits.argsort(axis=-1).flip(axis=-1)
        if candidates:
            assert candidates >= 2
            logging.info(f"Only testing the top-{candidates} classes")
            classes = classes[:, :candidates]

        i0 = classes[:, 0]
        rows = ep.arange(inputs, N)

        if loss == "logits":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = logits[rows, i0]
                lk = logits[rows, ik]
                loss = lk - l0
                return loss.sum(), (loss, logits)

        elif loss == "crossentropy":

            def loss_fun(x: ep.Tensor, k: int) -> ep.Tensor:
                logits = self.model.forward(x)
                ik = classes[:, k]
                l0 = -ep.crossentropy(logits, i0)
                lk = -ep.crossentropy(logits, ik)
                loss = lk - l0
                return loss.sum(), (loss, logits)

        else:
            raise ValueError(
                f"expected loss to be 'logits' or 'crossentropy', got '{loss}'"
            )

        loss_aux_and_grad = ep.value_and_grad_fn(inputs,
                                                 loss_fun,
                                                 has_aux=True)

        x = x0 = inputs
        p_total = ep.zeros_like(x)
        for step in range(steps):
            # let's first get the logits using k = 1 to see if we are done
            diffs = [loss_aux_and_grad(x, 1)]
            _, (_, logits), _ = diffs[0]
            is_adv = logits.argmax(axis=-1) != labels
            if is_adv.all():
                break
            # then run all the other k's as well
            # we could avoid repeated forward passes and only repeat
            # the backward pass, but this cannot currently be done in eagerpy
            diffs += [loss_aux_and_grad(x, k) for k in range(2, candidates)]

            # we don't need the logits
            diffs = [(losses, grad) for _, (losses, _), grad in diffs]
            losses = ep.stack([l for l, _ in diffs], axis=1)
            grads = ep.stack([g for _, g in diffs], axis=1)
            assert losses.shape == (N, candidates - 1)
            assert grads.shape == (N, candidates - 1) + x0.shape[1:]

            # calculate the distances
            distances = self.get_distances(losses, grads)
            assert distances.shape == (N, candidates - 1)

            # determine the best directions
            best = distances.argmin(axis=1)
            distances = distances[rows, best]
            losses = losses[rows, best]
            grads = grads[rows, best]
            assert distances.shape == (N, )
            assert losses.shape == (N, )
            assert grads.shape == x0.shape

            # apply perturbation
            distances = distances + 1e-4  # for numerical stability
            p_step = self.get_perturbations(distances, grads)
            assert p_step.shape == x0.shape

            p_total += p_step
            # don't do anything for those that are already adversarial
            x = ep.where(atleast_kd(is_adv, x.ndim), x,
                         x0 + (1.0 + overshoot) * p_total)
            x = ep.clip(x, min_, max_)

        return x.tensor
示例#10
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        starting_points: Optional[ep.Tensor] = None,
        early_stop: Optional[float] = None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        criterion_ = get_criterion(criterion)

        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        def loss_fn(
            inputs: ep.Tensor, labels: ep.Tensor
        ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]:

            logits = model(inputs)

            if targeted:
                c_minimize = best_other_classes(logits, labels)
                c_maximize = labels  # target_classes
            else:
                c_minimize = labels  # labels
                c_maximize = best_other_classes(logits, labels)

            loss = logits[rows, c_minimize] - logits[rows, c_maximize]

            return -loss.sum(), (logits, loss)

        x, restore_type = ep.astensor_(inputs)
        del inputs, criterion, kwargs
        N = len(x)

        # start from initialization points/attack
        if starting_points is not None:
            x1 = starting_points
        else:
            if self.init_attack is not None:
                x1 = self.init_attack.run(model, x, criterion_)
            else:
                x1 = None

        # if initial points or initialization attacks are provided,
        #   search for the boundary
        if x1 is not None:
            is_adv = get_is_adversarial(criterion_, model)
            assert is_adv(x1).all()
            lower_bound = ep.zeros(x, shape=(N, ))
            upper_bound = ep.ones(x, shape=(N, ))
            for _ in range(self.binary_search_steps):
                epsilons = (lower_bound + upper_bound) / 2
                mid_points = self.mid_points(x, x1, epsilons, model.bounds)
                is_advs = is_adv(mid_points)
                lower_bound = ep.where(is_advs, lower_bound, epsilons)
                upper_bound = ep.where(is_advs, epsilons, upper_bound)
            starting_points = self.mid_points(x, x1, upper_bound, model.bounds)
            delta = starting_points - x
        else:
            # start from x0
            delta = ep.zeros_like(x)

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        min_, max_ = model.bounds
        rows = range(N)
        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        if self.p != 0:
            epsilon = ep.inf * ep.ones(x, len(x))
        else:
            epsilon = ep.ones(x, len(x)) if x1 is None \
                else ep.norms.l0(flatten(delta), axis=-1)
        if self.p != 0:
            worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)),
                                     p=self.p,
                                     axis=-1)
        else:
            worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32()

        best_lp = worst_norm
        best_delta = delta
        adv_found = ep.zeros(x, len(x)).bool()

        for i in range(self.steps):
            # perform cosine annealing of learning rates
            stepsize = (self.min_stepsize +
                        (self.max_stepsize - self.min_stepsize) *
                        (1 + math.cos(math.pi * i / self.steps)) / 2)
            gamma = (0.001 + (self.gamma - 0.001) *
                     (1 + math.cos(math.pi * (i / self.steps))) / 2)

            x_adv = x + delta

            loss, (logits,
                   loss_batch), gradients = grad_and_logits(x_adv, classes)
            is_adversarial = criterion_(x_adv, logits)

            lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1)
            is_smaller = lp <= best_lp
            is_both = ep.logical_and(is_adversarial, is_smaller)
            adv_found = ep.logical_or(adv_found, is_adversarial)
            best_lp = ep.where(is_both, lp, best_lp)
            best_delta = ep.where(atleast_kd(is_both, x.ndim), delta,
                                  best_delta)

            # update epsilon
            if self.p != 0:
                distance_to_boundary = abs(loss_batch) / ep.norms.lp(
                    flatten(gradients), p=self.dual, axis=-1)
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        epsilon * (1 - gamma),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.where(
                        adv_found, epsilon * (1 + gamma),
                        ep.norms.lp(flatten(delta), p=self.p, axis=-1) +
                        distance_to_boundary))
            else:
                epsilon = ep.where(
                    is_adversarial,
                    ep.minimum(
                        ep.minimum(epsilon - 1,
                                   (epsilon * (1 - gamma)).astype(int).astype(
                                       epsilon.dtype)),
                        ep.norms.lp(flatten(best_delta), p=self.p, axis=-1)),
                    ep.maximum(epsilon + 1,
                               (epsilon * (1 + gamma)).astype(int).astype(
                                   epsilon.dtype)))
                epsilon = ep.maximum(0, epsilon).astype(epsilon.dtype)

            # clip epsilon
            epsilon = ep.minimum(epsilon, worst_norm)

            # computes normalized gradient update
            grad_ = self.normalize(gradients, x=x,
                                   bounds=model.bounds) * stepsize

            # do step
            delta = delta + grad_

            # project according to the given norm
            delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x

            # clip to valid bounds
            delta = ep.clip(x + delta, *model.bounds) - x

        x_adv = x + best_delta
        return restore_type(x_adv)
示例#11
0
    def __call__(
        self,
        inputs,
        labels,
        *,
        target_classes=None,
        binary_search_steps=9,
        max_iterations=10000,
        confidence=0,
        learning_rate=1e-2,
        initial_const=1e-3,
        abort_early=True,
    ):
        x = ep.astensor(inputs)
        N = len(x)

        targeted = target_classes is not None
        if targeted:
            labels = None
            target_classes = ep.astensor(target_classes)
            assert target_classes.shape == (N, )
            is_adv = partial(targeted_is_adv,
                             target_classes=target_classes,
                             confidence=confidence)
        else:
            labels = ep.astensor(labels)
            assert labels.shape == (N, )
            is_adv = partial(untargeted_is_adv,
                             labels=labels,
                             confidence=confidence)

        bounds = self.model.bounds()
        to_attack_space = partial(_to_attack_space, bounds=bounds)
        to_model_space = partial(_to_model_space, bounds=bounds)

        x_attack = to_attack_space(x)
        reconstsructed_x = to_model_space(x_attack)

        rows = np.arange(N)

        def loss_fun(delta: ep.Tensor, consts: ep.Tensor) -> ep.Tensor:
            assert delta.shape == x_attack.shape
            assert consts.shape == (N, )

            x = to_model_space(x_attack + delta)
            logits = ep.astensor(self.model.forward(x.tensor))

            if targeted:
                c_minimize = best_other_classes(logits, target_classes)
                c_maximize = target_classes
            else:
                c_minimize = labels
                c_maximize = best_other_classes(logits, labels)

            is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize]
            assert is_adv_loss.shape == (N, )
            is_adv_loss = is_adv_loss + confidence
            is_adv_loss = ep.maximum(0, is_adv_loss)
            is_adv_loss = is_adv_loss * consts

            squared_norms = flatten(x - reconstsructed_x).square().sum(axis=-1)
            loss = is_adv_loss.sum() + squared_norms.sum()
            return loss, (x, logits)

        loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True)

        consts = initial_const * np.ones((N, ))
        lower_bounds = np.zeros((N, ))
        upper_bounds = np.inf * np.ones((N, ))

        best_advs = ep.zeros_like(x)
        best_advs_norms = ep.ones(x, (N, )) * np.inf

        # the binary search searches for the smallest consts that produce adversarials
        for binary_search_step in range(binary_search_steps):
            if (binary_search_step == binary_search_steps - 1
                    and binary_search_steps >= 10):
                # in the last iteration, repeat the search once
                consts = np.minimum(upper_bounds, 1e10)

            # create a new optimizer find the delta that minimizes the loss
            delta = ep.zeros_like(x_attack)
            optimizer = AdamOptimizer(delta)

            found_advs = np.full(
                (N, ), fill_value=False)  # found adv with the current consts
            loss_at_previous_check = np.inf

            consts_ = ep.from_numpy(x, consts.astype(np.float32))

            for iteration in range(max_iterations):
                loss, (perturbed,
                       logits), gradient = loss_aux_and_grad(delta, consts_)
                delta += optimizer(gradient, learning_rate)

                if abort_early and iteration % (np.ceil(
                        max_iterations / 10)) == 0:
                    # after each tenth of the iterations, check progress
                    if not (loss <= 0.9999 * loss_at_previous_check):
                        break  # stop Adam if there has been no progress
                    loss_at_previous_check = loss

                found_advs_iter = is_adv(logits)
                found_advs = np.logical_or(found_advs, found_advs_iter.numpy())

                norms = flatten(perturbed - x).square().sum(axis=-1).sqrt()
                closer = norms < best_advs_norms
                new_best = closer.float32() * found_advs_iter.float32()

                best_advs = (
                    atleast_kd(new_best, best_advs.ndim) * perturbed +
                    (1 - atleast_kd(new_best, best_advs.ndim)) * best_advs)
                best_advs_norms = new_best * norms + (
                    1 - new_best) * best_advs_norms

            upper_bounds = np.where(found_advs, consts, upper_bounds)
            lower_bounds = np.where(found_advs, lower_bounds, consts)

            consts_exponential_search = consts * 10
            consts_binary_search = (lower_bounds + upper_bounds) / 2
            consts = np.where(np.isinf(upper_bounds),
                              consts_exponential_search, consts_binary_search)

        return best_advs.tensor
示例#12
0
    def run(
        self,
        model: Model,
        inputs: T,
        criterion: Union[Misclassification, TargetedMisclassification, T],
        *,
        early_stop: Optional[float] = None,
        filenames=None,
        **kwargs: Any,
    ) -> T:
        raise_if_kwargs(kwargs)
        x, restore_type = ep.astensor_(inputs)
        criterion_ = get_criterion(criterion)
        # is_adversarial = get_is_adversarial(criterion, model)
        del inputs, criterion, kwargs
        N = len(x)
        if isinstance(criterion_, Misclassification):
            targeted = False
            classes = criterion_.labels
        elif isinstance(criterion_, TargetedMisclassification):
            targeted = True
            classes = criterion_.target_classes
        else:
            raise ValueError("unsupported criterion")

        if classes.shape != (N, ):
            name = "target_classes" if targeted else "labels"
            raise ValueError(
                f"expected {name} to have shape ({N},), got {classes.shape}")

        def loss_fn(inputs: ep.Tensor,
                    labels: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]:
            logits = model(inputs)

            sign = -1.0 if targeted else 1.0
            # print("lss_fn ",logits, labels)
            loss = sign * ep.crossentropy(logits, labels).sum()

            return loss, logits

        grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True)

        image = Image.open('./test2.png')
        # image.show()
        # loader = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

        logist_clean = model(x)
        logist_clean = logist_clean.argmax(1)
        wm = []
        one_batch_attack_success = 0
        wm_numpy = torch.from_numpy(
            np.array(image, dtype=np.float32).transpose([2, 0, 1]))
        for k in range(N):
            wm.append(wm_numpy)
        wm_tensor = torch.stack(wm)
        # with open('result_inception_v3_gen1_40.csv', 'a+')as f:
        #     f_csv = csv.writer(f)
        msg = ''
        for j in range(N):  # foreach a batch
            if logist_clean[j] == classes[j]:
                blocks, alpha, angle = nsgaii.get_init()
                # x_j = "/home/frankfeng/researchData/code/adversarial_training_code/PLP/fast_adv/attacks/test/0.1504072755143_org.png"
                # x_j = Image.open(x_j)
                # x_j =transforms.ToTensor()(x_j).to(device)
                # x_j = PyTorchTensor(x_j)
                # print("x_j", x[j].raw.shape, x_j.shape)
                attack_success_population = nsgaii.nsgaii(
                    model, x[j], classes[j], wm_tensor[j], blocks, alpha,
                    angle, self.waterMark, filenames[j])
                # print("attack_success_population", attack_success_population)

                # (alpha[single_population],
                # angle[single_population],
                # logist_population[single_population],
                # l2_population[single_population],
                # x_adv_population[single_population]))
                #
                if len(attack_success_population) > 0:
                    one_batch_attack_success += 1
                # plt.figure()
                if self.need_show_img:
                    adv_dir = nsgaii.watermark_dir
                    if not os.path.exists(adv_dir):
                        os.makedirs(adv_dir)
                    timestamp = str(int(time.time() * 1000))
                    for index in range(len(attack_success_population)):
                        if index > 0:
                            break
                        alpha = attack_success_population[index][0]
                        angle = attack_success_population[index][1]
                        logist_population = attack_success_population[index][2]
                        l2_population = attack_success_population[index][3]

                        xxx = attack_success_population[index][4].raw.cpu(
                        ).numpy().transpose([1, 2, 0]) * 255
                        img = Image.fromarray(
                            xxx.astype('uint8')).convert('RGB')
                        img = img.resize((500, 500), Image.ANTIALIAS)
                        img.save(adv_dir + '/' + filenames[j])
                        # img.save(adv_dir+'/'+timestamp+"_org" + str(j) + "_" + str(index) + "_logist"+str(logist_population)+"_l2="+str(l2_population)+".png")
                        msg += timestamp + "_filename_" + filenames[
                            j] + "_logist" + str(
                                logist_population) + "_l2=" + str(
                                    l2_population) + "\n"
                        # if index == 0:
                        #     img_org = x[j].raw.cpu().numpy().transpose([1, 2, 0]) * 255
                        #     img_org = Image.fromarray(img_org.astype('uint8')).convert('RGB')
                        #     img_org = img_org.resize((500, 500), Image.ANTIALIAS)
                        #     img_org.save(adv_dir+'/'+timestamp+"_org" + str(j) + "_" + str(index) +"_class"+str(classes[j].raw.cpu().numpy()) + ".png")
            else:
                blocks, alpha, angle = nsgaii.get_init()
                attack_success_population = nsgaii.nsgaii(
                    model, x[j], logist_clean[j], wm_tensor[j], blocks, alpha,
                    angle, self.waterMark, filenames[j])
                # print("attack_success_population", attack_success_population)

                # (alpha[single_population],
                # angle[single_population],
                # logist_population[single_population],
                # l2_population[single_population],
                # x_adv_population[single_population]))
                #
                if len(attack_success_population) > 0:
                    one_batch_attack_success += 1
                # plt.figure()
                if self.need_show_img:
                    adv_dir = nsgaii.watermark_dir
                    if not os.path.exists(adv_dir):
                        os.makedirs(adv_dir)
                    timestamp = str(int(time.time() * 1000))
                    for index in range(len(attack_success_population)):
                        if index > 0:
                            break
                        alpha = attack_success_population[index][0]
                        angle = attack_success_population[index][1]
                        logist_population = attack_success_population[index][2]
                        l2_population = attack_success_population[index][3]

                        xxx = attack_success_population[index][4].raw.cpu(
                        ).numpy().transpose([1, 2, 0]) * 255
                        img = Image.fromarray(
                            xxx.astype('uint8')).convert('RGB')
                        img = img.resize((500, 500), Image.ANTIALIAS)
                        # img.save(adv_dir + '/' + timestamp + "_org" + str(j) + "_" + str(index) + "_logist" + str(
                        #     logist_population) + "_l2=" + str(l2_population) + ".png")
                        img.save(adv_dir + '/' + filenames[j])
                        msg += timestamp + "_filename_" + filenames[
                            j] + "_logist" + str(
                                logist_population) + "_l2=" + str(
                                    l2_population) + " pred error\n"
                        # if index == 0:
                        #     img_org = x[j].raw.cpu().numpy().transpose([1, 2, 0]) * 255
                        #     img_org = Image.fromarray(img_org.astype('uint8')).convert('RGB')
                        #     img_org = img_org.resize((500, 500), Image.ANTIALIAS)
                        #     img_org.save(
                        #         adv_dir + '/' + timestamp + "_org" + str(j) + "_" + str(index) + "_class" + str(
                        #             logist_clean[j].raw.cpu().numpy()) + ".png")
        return one_batch_attack_success, msg