Exemplo n.º 1
0
    def _loss_fn(self, output, y_onehot, linfdistsq, const):
        # TODO: move this out of the class and make this the default loss_fn
        #   after having targeted tests implemented
        real = (y_onehot * output).sum(dim=1)

        # TODO: make loss modular, write a loss class
        other, label_o = ((1.0 - y_onehot) * output -
                          (y_onehot * TARGET_MULT)).max(1)
        label_0 = F.one_hot(label_o, num_classes=self.num_classes)
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.adaptive_con:
            c = c_con
        else:
            c = self.confidence

        if self.targeted:
            loss1 = clamp(other - real + c, min=0.)
        else:
            loss1 = clamp(real - other + c, min=0.)

        # adaptive loss for evading evidence detector
        if self.adaptive_evi:
            loss1 += clamp(self.evi_train_median - output.logsumexp(dim=1),
                           min=0.)
        if self.adaptive_con:
            l = F.softmax(output, dim=1) * label_0
            loss1 = clamp(self.con_train_median - l.sum(dim=1), min=0.)

        loss2 = (linfdistsq).sum()
        loss1 = torch.sum(const * loss1)
        loss = loss1 + loss2
        return loss
Exemplo n.º 2
0
    def _loss_fn(self, output, y_onehot, l2distsq, const):
        # TODO: move this out of the class and make this the default loss_fn
        #   after having targeted tests implemented
        real = (y_onehot * output).sum(dim=1)

        # TODO: make loss modular, write a loss class
        other = ((1.0 - y_onehot) * output -
                 (y_onehot * TARGET_MULT)).max(1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = clamp(other - real + self.confidence, min=0.)
            threshold_loss = clamp(self.threshold - real, min=0.)
            #threshold_loss = clamp(real - self.threshold, min=0.)
        else:
            loss1 = clamp(real - other + self.confidence, min=0.)
            threshold_loss = clamp(self.threshold - other, min=0.)
        loss2 = (l2distsq).sum()
        # const = 0.001
        loss1 = torch.sum(const * loss1)
        threshold_loss = torch.sum(const * threshold_loss)
        # print('const ', const)
        print('dis: {:.2f}, loss1: {:.2f}, threshold_loss: {:.2f}'.format(
            loss2.item(), loss1.item(), threshold_loss.item()))
        loss = loss2 + threshold_loss
        #loss = loss1 + loss2 + threshold_loss
        return loss
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.

        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """
        x, y = self._verify_and_process_inputs(x, y)

        delta = torch.zeros_like(x)
        g = torch.zeros_like(x)

        delta = nn.Parameter(delta)

        for i in range(self.nb_iter):

            if delta.grad is not None:
                delta.grad.detach_()
                delta.grad.zero_()

            imgadv = x + delta
            outputs = self.predict(imgadv)
            loss = self.loss_fn(outputs, y)
            if self.targeted:
                loss = -loss
            loss.backward()

            g = self.decay_factor * g + normalize_by_pnorm(
                delta.grad.data, p=1)
            # according to the paper it should be .sum(), but in their
            #   implementations (both cleverhans and the link from the paper)
            #   it is .mean(), but actually it shouldn't matter
            if self.ord == np.inf:
                delta.data += self.eps_iter * torch.sign(g)
                delta.data = clamp(
                    delta.data, min=-self.eps, max=self.eps)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            elif self.ord == 2:
                delta.data += self.eps_iter * normalize_by_pnorm(g, p=2)
                delta.data *= clamp(
                    (self.eps * normalize_by_pnorm(delta.data, p=2)
                        / delta.data),
                    max=1.)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            else:
                error = "Only ord = inf and ord = 2 have been implemented"
                raise NotImplementedError(error)

        rval = x + delta.data
        return rval
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn,
                      delta_init=None, minimize=False, ord=np.inf,
                      clip_min=0.0, clip_max=1.0):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size per iteration.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: (optional float) mininum value per input dimension.
    :param clip_max: (optional float) maximum value per input dimension.
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)

    delta.requires_grad_()
    for ii in range(nb_iter):
        outputs = predict(xvar + delta)
        loss = loss_fn(outputs, yvar)
        if minimize:
            loss = -loss

        loss.backward()
        if ord == np.inf:
            grad_sign = delta.grad.data.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data

        elif ord == 2:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)
        else:
            error = "Only ord = inf and ord = 2 have been implemented"
            raise NotImplementedError(error)

        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv
    def _cwl2_loss(self, output, y_onehot):
        real = (y_onehot * output).sum(dim=1)
        other = ((1.0 - y_onehot) * output -
                 (y_onehot * TARGET_MULT)).max(1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss = clamp(other - real + self.confidence, min=0.)
        else:
            loss = clamp(real - other + self.confidence, min=0.)
        return loss
Exemplo n.º 6
0
    def _fast_iterative_shrinkage_thresholding(self, x, delta):

        zt = self.global_step / (self.global_step + 3)

        upper = clamp(delta - self.beta, max=self.clip_max)
        lower = clamp(delta + self.beta, min=self.clip_min)

        cond1 = ((delta - self.beta) > self.beta).float()
        cond2 = (torch.abs(delta - x) <= self.beta).float()
        cond3 = ((delta - x) < -self.beta).float()

        newimg = (cond1 * upper) + (cond2 * x) + (cond3 * lower)
        adv = newimg + (zt * (newimg - x))
        return adv
Exemplo n.º 7
0
    def _fast_iterative_shrinkage_thresholding(self, x, yy_k, xx_k):

        zt = self.global_step / (self.global_step + 3)

        upper = clamp(yy_k - self.beta, max=self.clip_max)
        lower = clamp(yy_k + self.beta, min=self.clip_min)

        diff = yy_k - x
        cond1 = (diff > self.beta).float()
        cond2 = (torch.abs(diff) <= self.beta).float()
        cond3 = (diff < -self.beta).float()

        xx_k_p_1 = (cond1 * upper) + (cond2 * x) + (cond3 * lower)
        yy_k.data = xx_k_p_1 + (zt * (xx_k_p_1 - xx_k))
        return yy_k, xx_k_p_1
Exemplo n.º 8
0
    def perturb(self, x, y):
        x, y = self._verify_and_process_inputs(x, y)
        delta = torch.zeros_like(x)
        g = torch.zeros_like(x)
        delta = nn.Parameter(delta)

        for i in range(self.nb_iter):
            if delta.grad is not None:
                delta.grad.detach_()
                delta.grad.zero_()

            imgadv = x + delta
            diverse_x = self.input_diversity(imgadv)
            outputs = self.predict(diverse_x)
            loss = self.loss_fn(outputs, y)
            if self.targeted:
                loss = -loss
            loss.backward()

            # Main Difference between DIM Attack and this
            delta.grad.data = F.conv2d(delta.grad.data,
                                       self.stack_kernel,
                                       stride=1,
                                       padding=7)
            g = self.decay_factor * g + normalize_by_pnorm(delta.grad.data,
                                                           p=1)
            # according to the paper it should be .sum(), but in their
            #   implementations (both cleverhans and the link from the paper)
            #   it is .mean(), but actually it shouldn't matter
            if self.attack_ball == 'Linf':
                delta.data += self.eps_iter * torch.sign(g)
                delta.data = clamp(delta.data, min=-self.eps, max=self.eps)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            elif self.attack_ball == 'L2':
                delta.data += self.eps_iter * normalize_by_pnorm(g, p=2)
                delta.data *= clamp(
                    (self.eps * normalize_by_pnorm(delta.data, p=2) /
                     delta.data),
                    max=1.)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            else:
                error = "Only ord = inf and ord = 2 have been implemented"
                raise NotImplementedError(error)

        rval = x + delta.data
        return rval
Exemplo n.º 9
0
def rand_init_delta(delta, x, ord, eps, clip_min, clip_max):
    # TODO: Currently only considered one way of "uniform" sampling
    # for Linf, there are 3 ways:
    #   1) true uniform sampling by first calculate the rectangle then sample
    #   2) uniform in eps box then truncate using data domain (implemented)
    #   3) uniform sample in data domain then truncate with eps box
    # for L2, true uniform sampling is hard, since it requires uniform sampling
    #   inside a intersection of cube and ball, so there are 2 ways:
    #   1) uniform sample in the data domain, then truncate using the L2 ball
    #       (implemented)
    #   2) uniform sample in the L2 ball, then truncate using the data domain

    if isinstance(eps, torch.Tensor):
        assert len(eps) == len(delta)

    if ord == np.inf:
        delta.data.uniform_(-1, 1)
        delta.data = batch_multiply(eps, delta.data)
    elif ord == 2:
        delta.data.uniform_(0, 1)
        delta.data = delta.data - x
        delta.data = clamp_by_pnorm(delta.data, ord, eps)
    else:
        error = "Only ord = inf and ord = 2 have been implemented"
        raise NotImplementedError(error)

    delta.data = clamp(x + delta.data, min=clip_min, max=clip_max) - x
    return delta.data
Exemplo n.º 10
0
 def _get_arctanh_x(self, x):
     # Carlini's original implementation uses a slightly different formula because
     # the image space is [-0.5, 0.5] instead of [clip_min, clip_max]
     result = clamp((x - self.clip_min) / (self.clip_max - self.clip_min),
                    min=0.,
                    max=1.) * 2 - 1
     return torch_arctanh(result * ONE_MINUS_EPS)
Exemplo n.º 11
0
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.
        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """

        x, y = self._verify_and_process_inputs(x, y)
        xadv = x.requires_grad_()
        outputs = self.predict(xadv)

        loss = self.loss_fn(outputs, y)
        if self.targeted:
            loss = -loss
        loss.backward()
        grad_sign = xadv.grad.detach().sign()

        if self.getAtkpn:
            xadv = grad_sign
        else:
            xadv = xadv + self.eps * grad_sign
            xadv = clamp(xadv, self.clip_min, self.clip_max)

        return xadv.detach()
    def perturb(self, source, guide, delta=None):
        """
        Given source, returns their adversarial counterparts
        with representations close to that of the guide.

        :param source: input tensor which we want to perturb.
        :param guide: targeted input.
        :param delta: tensor contains the random initialization.
        :return: tensor containing perturbed inputs.
        """
        # Initialization
        if delta is None:
            delta = torch.zeros_like(source)
            if self.rand_init:
                delta = delta.uniform_(-self.eps, self.eps)
        else:
            delta = delta.detach()

        delta.requires_grad_()

        source = replicate_input(source)
        guide = replicate_input(guide)
        guide_ftr = self.predict(guide).detach()

        xadv = perturb_iterative(source, guide_ftr, self.predict,
                                 self.nb_iter, eps_iter=self.eps_iter,
                                 loss_fn=self.loss_fn, minimize=True,
                                 ord=np.inf, eps=self.eps,
                                 clip_min=self.clip_min,
                                 clip_max=self.clip_max,
                                 delta_init=delta)

        xadv = clamp(xadv, self.clip_min, self.clip_max)

        return xadv.data
Exemplo n.º 13
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)
        xadv = x
        batch_size = x.shape[0]
        dim_x = int(np.prod(x.shape[1:]))
        max_iters = int(dim_x * self.gamma / 2)
        search_space = x.new_ones(batch_size, dim_x).int()
        curr_step = 0
        yadv = self._get_predicted_label(xadv)

        # Algorithm 1
        while ((y != yadv).any() and curr_step < max_iters):

            grads_target, grads_other = self._compute_forward_derivative(
                xadv, y)

            # Algorithm 3
            p1, p2, valid = self._saliency_map(search_space, grads_target,
                                               grads_other, y)

            cond = (y != yadv) & valid

            self._update_search_space(search_space, p1, p2, cond)

            xadv = self._modify_xadv(xadv, batch_size, cond, p1, p2)
            yadv = self._get_predicted_label(xadv)

            curr_step += 1

        xadv = clamp(xadv, min=self.clip_min, max=self.clip_max)
        return xadv
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.

        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """
        x, y = self._verify_and_process_inputs(x, y)

        delta = torch.zeros_like(x)
        delta = nn.Parameter(delta)
        if self.rand_init:
            rand_init_delta(
                delta, x, self.ord, self.eps, self.clip_min, self.clip_max)
            delta.data = clamp(
                x + delta.data, min=self.clip_min, max=self.clip_max) - x

        rval = perturb_iterative(
            x, y, self.predict, nb_iter=self.nb_iter,
            eps=self.eps, eps_iter=self.eps_iter,
            loss_fn=self.loss_fn, minimize=self.targeted,
            ord=self.ord, clip_min=self.clip_min,
            clip_max=self.clip_max, delta_init=delta, sparsity=self.sparsity, eot_samples=self.eot_samples)

        return rval.data
Exemplo n.º 15
0
        def perturb_fool_many(self,
                              x,
                              emb,
                              indlist,
                              y=None):  #list of ind of words to be perturbed
            """
          Given examples (x, y), returns their adversarial counterparts with
          an attack length of eps.
          :param x: input tensor.
          :param y: label tensor.
                    - if None and self.targeted=False, compute y as predicted
                      labels.
                    - if self.targeted=True, then y must be the targeted labels.
          :return: tensor containing perturbed inputs.
          """
            emb, y = self._verify_and_process_inputs(emb, y)  #???

            delta = torch.zeros_like(emb)
            delta = nn.Parameter(delta)
            if self.rand_init:
                rand_init_delta(delta, emb, np.inf, self.eps, self.clip_min,
                                self.clip_max)
                delta.data = clamp(emb + delta.data,
                                   min=self.clip_min,
                                   max=self.clip_max) - emb

            with torch.no_grad():
                for ba in range(delta.size()[0]):
                    for t in range(delta.size()[1]):
                        if not (t in indlist[ba]):
                            for k in range(delta.size()[2]):
                                delta[ba][t][k] = 0
                if self.ord == 0:
                    for ba in range(delta.size()[0]):
                        delta[ba] = my_proj_all(emb[ba] + delta[ba], emb[ba],
                                                indlist[ba],
                                                self.eps) - emb[ba]

            rval, word_balance_memory, loss_memory, tablistbatch, fool = perturb_iterative_fool_many(
                x,
                emb,
                indlist,
                y,
                self.predict,
                nb_iter=self.nb_iter,
                eps=self.eps,
                epscand=self.epscand,
                eps_iter=self.eps_iter,
                loss_fn=self.loss_fn,
                minimize=self.targeted,
                ord=self.ord,
                clip_min=self.clip_min,
                clip_max=self.clip_max,
                delta_init=delta,
                l1_sparsity=self.l1_sparsity,
                rayon=self.rayon)

            return rval.data, word_balance_memory, loss_memory, tablistbatch, fool
Exemplo n.º 16
0
    def _loss_fn(self, output, y_onehot, l2distsq, const):
        # TODO: move this out of the class and make this the default loss_fn
        #   after having targeted tests implemented
        real = (y_onehot * output).sum(dim=1)

        # TODO: make loss modular, write a loss class
        other = ((1.0 - y_onehot) * output -
                 (y_onehot * TARGET_MULT)).max(1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = clamp(other - real + self.confidence, min=0.)
        else:
            loss1 = clamp(real - other + self.confidence, min=0.)
        loss2 = (l2distsq).sum()
        loss1 = torch.sum(const * loss1)
        loss = loss1 + loss2
        return loss
Exemplo n.º 17
0
 def _modify_xadv(self, xadv, batch_size, cond, p1, p2):
     ori_shape = xadv.shape
     xadv = xadv.view(batch_size, -1)
     for idx in range(batch_size):
         if cond[idx] != 0:
             xadv[idx, p1[idx]] += self.theta
             xadv[idx, p2[idx]] += self.theta
     xadv = clamp(xadv, min=self.clip_min, max=self.clip_max)
     xadv = xadv.view(ori_shape)
     return xadv
Exemplo n.º 18
0
    def _loss_fn(self, output, y_onehot, l1dist, l2distsq, const, opt=False):

        real = (y_onehot * output).sum(dim=1)
        other = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1)[0]

        if self.targeted:
            loss_logits = clamp(other - real + self.confidence, min=0.)
        else:
            loss_logits = clamp(real - other + self.confidence, min=0.)
        loss_logits = torch.sum(const * loss_logits)

        loss_l2 = l2distsq.sum()

        if opt:
          loss = loss_logits + loss_l2
        else:
          loss_l1 = self.beta * l1dist.sum()
          loss = loss_logits + loss_l2 + loss_l1
        return loss
Exemplo n.º 19
0
def test_clamp():
    def _convert_to_float(x):
        return float(x) if x is not None else None

    def _convert_to_batch_tensor(x, data):
        return x * torch.ones_like(data) if x is not None else None

    def _convert_to_single_tensor(x, data):
        return x * torch.ones_like(data[0]) if x is not None else None

    for min, max in [(-1, None), (None, 1), (-1, 1)]:
        data = 3 * torch.randn((11, 12, 13))
        case1 = clamp(data, min, max)
        case2 = clamp(data, _convert_to_float(min), _convert_to_float(max))
        case3 = clamp(data, _convert_to_batch_tensor(min, data),
                      _convert_to_batch_tensor(max, data))
        case4 = clamp(data, _convert_to_single_tensor(min, data),
                      _convert_to_single_tensor(max, data))

        assert torch.all(case1 == case2)
        assert torch.all(case2 == case3)
        assert torch.all(case3 == case4)
Exemplo n.º 20
0
    def _loss_fn_spatial(self, grid, x, y, const, grid_ori):
        imgs = x.clone()
        grid = torch.from_numpy(grid.reshape(grid_ori.shape)).float().to(
            x.device).requires_grad_()
        delta = grid_ori - grid

        adv_img = F.grid_sample(imgs, grid)
        output = self.predict(adv_img)
        real = (y * output).sum(dim=1)
        other = ((1.0 - y) * output - (y * TARGET_MULT)).max(1)[0]
        if self.targeted:
            loss1 = clamp(other - real + self.confidence, min=0.)
        else:
            loss1 = clamp(real - other + self.confidence, min=0.)
        loss2 = self.initial_const * (torch.sqrt(
            (((delta[:, :, 1:] - delta[:, :, :-1] + 1e-10)**2)).view(
                delta.shape[0], -1).sum(1)) + torch.sqrt(
                    ((delta[:, 1:, :] - delta[:, :-1, :] + 1e-10)**2).view(
                        delta.shape[0], -1).sum(1)))
        loss = torch.sum(loss1) + torch.sum(loss2)
        loss.backward()
        grad_ret = grid.grad.data.cpu().numpy().flatten().astype(float)
        grid.grad.data.zero_()
        return loss.data.cpu().numpy().astype(float), grad_ret
 def perturb(self, x, y, target_y=None):
     with ctx_noparamgrad_and_eval(self.predict):
         if self.pgdadv.targeted:
             self.target_y = target_y
             xadv = self.pgdadv.perturb(x, target_y)
             adv_pred = self.pgdadv.predict(xadv).argmax(1)
             # print((adv_pred == target_y).float().mean())
         else:
             xadv = self.pgdadv.perturb(x, y)        
     # print(self.pgdadv.eps, x.shape, xadv.shape, torch.norm((x-xadv).view(x.shape[0],-1), p=float('inf'), dim=1).mean())
     unitptb, curr_eps = self._get_unitptb_and_eps(
         xadv, x, y, self.pgdadv.eps)
     xadv = clamp(x + batch_multiply(curr_eps, unitptb),
                     min=self.pgdadv.clip_min, max=self.pgdadv.clip_max)
     # print('')
     return xadv
Exemplo n.º 22
0
def whitebox_pgd(args, image, target, model, normalize=None):
    adversary = LinfPGDAttack(
	model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3,
	nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
	targeted=False)
    adv_image = adversary.perturb(image, target)
    print("Target is %d" %(target))
    pred = model(adv_image)
    out = pred.max(1, keepdim=True)[1] # get the index of the max log-probability
    print("Adv Target is %d" %(out))
    clean_image = (image)[0].detach()
    adv_image = adv_image[0].detach()
    if args.comet:
        plot_image_to_comet(args,clean_image,"clean.png")
        plot_image_to_comet(args,adv_image,"Adv.png")
    return pred, clamp(clean_image - adv_image,0.,1.)
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.

        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """
        x, y = self._verify_and_process_inputs(x, y)
        shape, flat_x = _flatten(x)
        data_shape = tuple(shape[1:])

        def f(x):
            new_shape = (x.shape[0], ) + data_shape
            input = x.reshape(new_shape)
            return self.predict(input)

        f_nes = NESWrapper(f, nb_samples=self.nb_samples, fd_eta=self.fd_eta)

        delta = torch.zeros_like(flat_x)
        delta = nn.Parameter(delta)
        if self.rand_init:
            rand_init_delta(delta, flat_x, self.ord, self.eps, self.clip_min,
                            self.clip_max)
            delta.data = clamp(flat_x + delta.data,
                               min=self.clip_min,
                               max=self.clip_max) - flat_x

        rval = perturb_iterative(flat_x,
                                 y,
                                 f_nes,
                                 nb_iter=self.nb_iter,
                                 eps=self.eps,
                                 eps_iter=self.eps_iter,
                                 loss_fn=self.loss_fn,
                                 minimize=self.targeted,
                                 ord=self.ord,
                                 clip_min=self.clip_min,
                                 clip_max=self.clip_max,
                                 delta_init=delta,
                                 l1_sparsity=None)

        return rval.data.reshape(shape)
Exemplo n.º 24
0
def single_white_box_generator(args, image, target, model, G):
    epsilon = 0.5
    # Create noise vector
    x = image
    opt = optim.SGD(G.parameters(), lr=1e-2)

    print("Target is %d" % (target))
    for t in range(args.PGD_steps):
        delta, kl_div = G(x)
        delta = delta.view(delta.size(0), 1, 28, 28)
        delta.data.clamp_(-epsilon, epsilon)
        delta.data = clamp(x.data + delta.data, 0., 1.) - x.data
        pred = model(x.detach() + delta)
        out = pred.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        loss = -nn.CrossEntropyLoss(reduction="sum")(pred, target)
        if args.comet:
            args.experiment.log_metric("Whitebox CE loss", loss, step=t)
        if t % 5 == 0:
            print(t, out[0][0], loss.item())
        opt.zero_grad()
        loss.backward()
        for p in G.parameters():
            p.grad.data.sign_()
        # Clipping is equivalent to projecting back onto the l_\infty ball
        # This technique is known as projected gradient descent (PGD)
        # delta.data.clamp_(-epsilon, epsilon)
        # delta.data = clamp(x.data + delta.data,0.,1.) - x.data
        opt.step()
        if out != target:
            print(t, out[0][0], loss.item())
            break
    if args.comet:
        if not args.mnist:
            clean_image = (image)[0].detach().cpu().numpy().transpose(1, 2, 0)
            adv_image = (x + delta)[0].detach().cpu().numpy().transpose(
                1, 2, 0)
            delta_image = (delta)[0].detach().cpu().numpy().transpose(1, 2, 0)
        else:
            clean_image = (image)[0].detach()
            adv_image = (x + delta)[0].detach()
            delta_image = (delta)[0].detach()
        plot_image_to_comet(args, clean_image, "clean.png")
        plot_image_to_comet(args, adv_image, "Adv.png")
        plot_image_to_comet(args, delta_image, "delta.png")
    return out, delta
Exemplo n.º 25
0
    def _rescale_x_score(self, predict, x, y, ori, best_dist):
        x = torch.stack(x)
        x = self._revert_rescale(x)

        batch_logits = predict(x)
        scores = nn.Softmax(dim=1)(batch_logits)[:, y]

        if not self.comply_with_foolbox:
            x = clamp(x, self.clip_min, self.clip_max)
            batch_logits = predict(x)

        _, bests = torch.max(batch_logits, dim=1)
        best_img = None
        for ii in range(len(bests)):
            curr_dist = torch.sum((x[ii] - ori)**2)
            if (is_successful(int(bests[ii]), y, self.targeted)
                    and curr_dist < best_dist):
                best_img = x[ii]
                best_dist = curr_dist
        scores = nn.Softmax(dim=1)(batch_logits)[:, y]
        return scores, best_img, best_dist
Exemplo n.º 26
0
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.
        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """

        x, y = self._verify_and_process_inputs(x, y)

        delta = torch.zeros_like(x)
        delta = nn.Parameter(delta)
        if self.rand_init:
            rand_init_delta(
                delta, x, self.ord, self.eps, self.clip_min, self.clip_max)
            delta.data = clamp(
                x + delta.data, min=self.clip_min, max=self.clip_max) - x

        rval, delta = masked_perturb_iterative(
            x, y, self.predict, nb_iter=self.nb_iter,
            eps=self.eps, eps_iter=self.eps_iter,
            loss_fn=self.loss_fn, minimize=self.targeted,
            ord=self.ord, clip_min=self.clip_min,
            clip_max=self.clip_max, delta_init=delta,
            l1_sparsity=self.l1_sparsity,
            mask_steps=self.mask_steps,
            device=self.device
        )

        file_name = self.experiment_name + "_iter" + str(self.nb_iter) + "_delta"
        f = open(file_name+'.npy', 'wb')
        np.save(f, delta.detach().cpu().numpy())

        return rval.data
 def _get_unitptb_and_eps(self, xadv, x, y, prev_eps):
     unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x))
     adv_logits = self.predict(xadv)
     logit_margin = elementwise_margin(adv_logits, y)        
     ones = torch.ones_like(y).float()
     # maxeps = self.maxeps * ones        
     maxeps = torch.norm((xadv-x).view(x.shape[0],-1), p=self.pgdadv.ord, dim=1)
     
     adv_pred = adv_logits.argmax(1)
     # print(1 - (adv_pred == y).float().mean())
     # print(maxeps.min(), maxeps.max())
     pred = adv_pred.clone()
     i=0
     # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max()))
     while i < 10:            
         if self.pgdadv.targeted:
             unsuccessful_adv_idx = (adv_pred != self.target_y) & (pred != self.target_y)
             if not unsuccessful_adv_idx.any():
                 break
         else:
             unsuccessful_adv_idx = (adv_pred == y) & (pred == y)
         maxeps[unsuccessful_adv_idx] *= 1.5
         maxeps_ = maxeps[unsuccessful_adv_idx]
         unitptb_ = unitptb[unsuccessful_adv_idx]
         x_ = x[unsuccessful_adv_idx]            
                     
         x_ = clamp(x_ + batch_multiply(maxeps_, unitptb_),
                     min=0., max=1.)
         pred[unsuccessful_adv_idx] = self.predict(x_).argmax(1)            
         i += 1
         # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max()))
     # print(logit_margin)
     curr_eps = bisection_search(
         maxeps * 0.5, unitptb, self.predict, x, y, elementwise_margin,
         logit_margin, maxeps, self.num_search_steps)
     if self.pgdadv.targeted:
         curr_eps[pred != self.target_y] = np.inf
     return unitptb, curr_eps
Exemplo n.º 28
0
def bisection_search(cur_eps,
                     ptb,
                     model,
                     data,
                     label,
                     fn_margin,
                     margin_init,
                     maxeps,
                     num_steps,
                     cur_min=None,
                     clip_min=0.,
                     clip_max=1.):

    assert torch.all(cur_eps <= maxeps)

    margin = margin_init

    if cur_min is None:
        cur_min = torch.zeros_like(margin)
    cur_max = maxeps.clone().detach()

    for ii in range(num_steps):
        cur_min = torch.max((margin < 0).float() * cur_eps, cur_min)
        cur_max = torch.min(
            ((margin < 0).float() * maxeps + (margin >= 0).float() * cur_eps),
            cur_max)

        cur_eps = (cur_min + cur_max) / 2
        margin = fn_margin(
            model(
                clamp(data + batch_multiply(cur_eps, ptb),
                      min=clip_min,
                      max=clip_max)), label)

    assert torch.all(cur_eps <= maxeps)

    return cur_eps
def perturb_iterative(xvar,
                      yvar,
                      predict1,
                      predict2,
                      nb_iter,
                      eps,
                      eps_iter,
                      loss_fn,
                      delta_init=None,
                      minimize=False,
                      ord=np.inf,
                      clip_min=0.0,
                      clip_max=1.0,
                      l1_sparsity=None):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: mininum value per input dimension.
    :param clip_max: maximum value per input dimension.
    :param l1_sparsity: sparsity value for L1 projection.
                  - if None, then perform regular L1 projection.
                  - if float value, then perform sparse L1 descent from
                    Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)

    delta.requires_grad_()
    for ii in range(nb_iter):
        if predict2 is not None:
            outputs = predict2(predict1(xvar + delta))
        else:
            outputs = predict1(xvar + delta)
        loss = loss_fn(outputs, yvar)
        if minimize:
            loss = -loss

        loss.backward()
        if ord == np.inf:
            grad_sign = delta.grad.data.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data

        elif ord == 2:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)

        elif ord == 1:
            grad = delta.grad.data
            abs_grad = torch.abs(grad)

            batch_size = grad.size(0)
            view = abs_grad.view(batch_size, -1)
            view_size = view.size(1)
            if l1_sparsity is None:
                vals, idx = view.topk(1)
            else:
                vals, idx = view.topk(
                    int(np.round((1 - l1_sparsity) * view_size)))

            out = torch.zeros_like(view).scatter_(1, idx, vals)
            out = out.view_as(grad)
            grad = grad.sign() * (out > 0).float()
            grad = normalize_by_pnorm(grad, p=1)
            delta.data = delta.data + batch_multiply(eps_iter, grad)

            delta.data = batch_l1_proj(delta.data.cpu(), eps)
            if xvar.is_cuda:
                delta.data = delta.data.cuda()
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
        else:
            error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
            raise NotImplementedError(error)
        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv
Exemplo n.º 30
0
 def _get_arctanh_x(self, x):
     result = clamp((x - self.clip_min) / (self.clip_max - self.clip_min),
                    min=self.clip_min,
                    max=self.clip_max) * 2 - 1
     return torch_arctanh(result * ONE_MINUS_EPS)