def _loss_fn(self, output, y_onehot, linfdistsq, const): # TODO: move this out of the class and make this the default loss_fn # after having targeted tests implemented real = (y_onehot * output).sum(dim=1) # TODO: make loss modular, write a loss class other, label_o = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1) label_0 = F.one_hot(label_o, num_classes=self.num_classes) # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.adaptive_con: c = c_con else: c = self.confidence if self.targeted: loss1 = clamp(other - real + c, min=0.) else: loss1 = clamp(real - other + c, min=0.) # adaptive loss for evading evidence detector if self.adaptive_evi: loss1 += clamp(self.evi_train_median - output.logsumexp(dim=1), min=0.) if self.adaptive_con: l = F.softmax(output, dim=1) * label_0 loss1 = clamp(self.con_train_median - l.sum(dim=1), min=0.) loss2 = (linfdistsq).sum() loss1 = torch.sum(const * loss1) loss = loss1 + loss2 return loss
def _loss_fn(self, output, y_onehot, l2distsq, const): # TODO: move this out of the class and make this the default loss_fn # after having targeted tests implemented real = (y_onehot * output).sum(dim=1) # TODO: make loss modular, write a loss class other = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = clamp(other - real + self.confidence, min=0.) threshold_loss = clamp(self.threshold - real, min=0.) #threshold_loss = clamp(real - self.threshold, min=0.) else: loss1 = clamp(real - other + self.confidence, min=0.) threshold_loss = clamp(self.threshold - other, min=0.) loss2 = (l2distsq).sum() # const = 0.001 loss1 = torch.sum(const * loss1) threshold_loss = torch.sum(const * threshold_loss) # print('const ', const) print('dis: {:.2f}, loss1: {:.2f}, threshold_loss: {:.2f}'.format( loss2.item(), loss1.item(), threshold_loss.item())) loss = loss2 + threshold_loss #loss = loss1 + loss2 + threshold_loss return loss
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) g = torch.zeros_like(x) delta = nn.Parameter(delta) for i in range(self.nb_iter): if delta.grad is not None: delta.grad.detach_() delta.grad.zero_() imgadv = x + delta outputs = self.predict(imgadv) loss = self.loss_fn(outputs, y) if self.targeted: loss = -loss loss.backward() g = self.decay_factor * g + normalize_by_pnorm( delta.grad.data, p=1) # according to the paper it should be .sum(), but in their # implementations (both cleverhans and the link from the paper) # it is .mean(), but actually it shouldn't matter if self.ord == np.inf: delta.data += self.eps_iter * torch.sign(g) delta.data = clamp( delta.data, min=-self.eps, max=self.eps) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x elif self.ord == 2: delta.data += self.eps_iter * normalize_by_pnorm(g, p=2) delta.data *= clamp( (self.eps * normalize_by_pnorm(delta.data, p=2) / delta.data), max=1.) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) rval = x + delta.data return rval
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size per iteration. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: (optional float) mininum value per input dimension. :param clip_max: (optional float) maximum value per input dimension. :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in range(nb_iter): outputs = predict(xvar + delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv
def _cwl2_loss(self, output, y_onehot): real = (y_onehot * output).sum(dim=1) other = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss = clamp(other - real + self.confidence, min=0.) else: loss = clamp(real - other + self.confidence, min=0.) return loss
def _fast_iterative_shrinkage_thresholding(self, x, delta): zt = self.global_step / (self.global_step + 3) upper = clamp(delta - self.beta, max=self.clip_max) lower = clamp(delta + self.beta, min=self.clip_min) cond1 = ((delta - self.beta) > self.beta).float() cond2 = (torch.abs(delta - x) <= self.beta).float() cond3 = ((delta - x) < -self.beta).float() newimg = (cond1 * upper) + (cond2 * x) + (cond3 * lower) adv = newimg + (zt * (newimg - x)) return adv
def _fast_iterative_shrinkage_thresholding(self, x, yy_k, xx_k): zt = self.global_step / (self.global_step + 3) upper = clamp(yy_k - self.beta, max=self.clip_max) lower = clamp(yy_k + self.beta, min=self.clip_min) diff = yy_k - x cond1 = (diff > self.beta).float() cond2 = (torch.abs(diff) <= self.beta).float() cond3 = (diff < -self.beta).float() xx_k_p_1 = (cond1 * upper) + (cond2 * x) + (cond3 * lower) yy_k.data = xx_k_p_1 + (zt * (xx_k_p_1 - xx_k)) return yy_k, xx_k_p_1
def perturb(self, x, y): x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) g = torch.zeros_like(x) delta = nn.Parameter(delta) for i in range(self.nb_iter): if delta.grad is not None: delta.grad.detach_() delta.grad.zero_() imgadv = x + delta diverse_x = self.input_diversity(imgadv) outputs = self.predict(diverse_x) loss = self.loss_fn(outputs, y) if self.targeted: loss = -loss loss.backward() # Main Difference between DIM Attack and this delta.grad.data = F.conv2d(delta.grad.data, self.stack_kernel, stride=1, padding=7) g = self.decay_factor * g + normalize_by_pnorm(delta.grad.data, p=1) # according to the paper it should be .sum(), but in their # implementations (both cleverhans and the link from the paper) # it is .mean(), but actually it shouldn't matter if self.attack_ball == 'Linf': delta.data += self.eps_iter * torch.sign(g) delta.data = clamp(delta.data, min=-self.eps, max=self.eps) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x elif self.attack_ball == 'L2': delta.data += self.eps_iter * normalize_by_pnorm(g, p=2) delta.data *= clamp( (self.eps * normalize_by_pnorm(delta.data, p=2) / delta.data), max=1.) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) rval = x + delta.data return rval
def rand_init_delta(delta, x, ord, eps, clip_min, clip_max): # TODO: Currently only considered one way of "uniform" sampling # for Linf, there are 3 ways: # 1) true uniform sampling by first calculate the rectangle then sample # 2) uniform in eps box then truncate using data domain (implemented) # 3) uniform sample in data domain then truncate with eps box # for L2, true uniform sampling is hard, since it requires uniform sampling # inside a intersection of cube and ball, so there are 2 ways: # 1) uniform sample in the data domain, then truncate using the L2 ball # (implemented) # 2) uniform sample in the L2 ball, then truncate using the data domain if isinstance(eps, torch.Tensor): assert len(eps) == len(delta) if ord == np.inf: delta.data.uniform_(-1, 1) delta.data = batch_multiply(eps, delta.data) elif ord == 2: delta.data.uniform_(0, 1) delta.data = delta.data - x delta.data = clamp_by_pnorm(delta.data, ord, eps) else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) delta.data = clamp(x + delta.data, min=clip_min, max=clip_max) - x return delta.data
def _get_arctanh_x(self, x): # Carlini's original implementation uses a slightly different formula because # the image space is [-0.5, 0.5] instead of [clip_min, clip_max] result = clamp((x - self.clip_min) / (self.clip_max - self.clip_min), min=0., max=1.) * 2 - 1 return torch_arctanh(result * ONE_MINUS_EPS)
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) xadv = x.requires_grad_() outputs = self.predict(xadv) loss = self.loss_fn(outputs, y) if self.targeted: loss = -loss loss.backward() grad_sign = xadv.grad.detach().sign() if self.getAtkpn: xadv = grad_sign else: xadv = xadv + self.eps * grad_sign xadv = clamp(xadv, self.clip_min, self.clip_max) return xadv.detach()
def perturb(self, source, guide, delta=None): """ Given source, returns their adversarial counterparts with representations close to that of the guide. :param source: input tensor which we want to perturb. :param guide: targeted input. :param delta: tensor contains the random initialization. :return: tensor containing perturbed inputs. """ # Initialization if delta is None: delta = torch.zeros_like(source) if self.rand_init: delta = delta.uniform_(-self.eps, self.eps) else: delta = delta.detach() delta.requires_grad_() source = replicate_input(source) guide = replicate_input(guide) guide_ftr = self.predict(guide).detach() xadv = perturb_iterative(source, guide_ftr, self.predict, self.nb_iter, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=True, ord=np.inf, eps=self.eps, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta) xadv = clamp(xadv, self.clip_min, self.clip_max) return xadv.data
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) xadv = x batch_size = x.shape[0] dim_x = int(np.prod(x.shape[1:])) max_iters = int(dim_x * self.gamma / 2) search_space = x.new_ones(batch_size, dim_x).int() curr_step = 0 yadv = self._get_predicted_label(xadv) # Algorithm 1 while ((y != yadv).any() and curr_step < max_iters): grads_target, grads_other = self._compute_forward_derivative( xadv, y) # Algorithm 3 p1, p2, valid = self._saliency_map(search_space, grads_target, grads_other, y) cond = (y != yadv) & valid self._update_search_space(search_space, p1, p2, cond) xadv = self._modify_xadv(xadv, batch_size, cond, p1, p2) yadv = self._get_predicted_label(xadv) curr_step += 1 xadv = clamp(xadv, min=self.clip_min, max=self.clip_max) return xadv
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta( delta, x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x rval = perturb_iterative( x, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, sparsity=self.sparsity, eot_samples=self.eot_samples) return rval.data
def perturb_fool_many(self, x, emb, indlist, y=None): #list of ind of words to be perturbed """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ emb, y = self._verify_and_process_inputs(emb, y) #??? delta = torch.zeros_like(emb) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta(delta, emb, np.inf, self.eps, self.clip_min, self.clip_max) delta.data = clamp(emb + delta.data, min=self.clip_min, max=self.clip_max) - emb with torch.no_grad(): for ba in range(delta.size()[0]): for t in range(delta.size()[1]): if not (t in indlist[ba]): for k in range(delta.size()[2]): delta[ba][t][k] = 0 if self.ord == 0: for ba in range(delta.size()[0]): delta[ba] = my_proj_all(emb[ba] + delta[ba], emb[ba], indlist[ba], self.eps) - emb[ba] rval, word_balance_memory, loss_memory, tablistbatch, fool = perturb_iterative_fool_many( x, emb, indlist, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, epscand=self.epscand, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=self.l1_sparsity, rayon=self.rayon) return rval.data, word_balance_memory, loss_memory, tablistbatch, fool
def _loss_fn(self, output, y_onehot, l2distsq, const): # TODO: move this out of the class and make this the default loss_fn # after having targeted tests implemented real = (y_onehot * output).sum(dim=1) # TODO: make loss modular, write a loss class other = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = clamp(other - real + self.confidence, min=0.) else: loss1 = clamp(real - other + self.confidence, min=0.) loss2 = (l2distsq).sum() loss1 = torch.sum(const * loss1) loss = loss1 + loss2 return loss
def _modify_xadv(self, xadv, batch_size, cond, p1, p2): ori_shape = xadv.shape xadv = xadv.view(batch_size, -1) for idx in range(batch_size): if cond[idx] != 0: xadv[idx, p1[idx]] += self.theta xadv[idx, p2[idx]] += self.theta xadv = clamp(xadv, min=self.clip_min, max=self.clip_max) xadv = xadv.view(ori_shape) return xadv
def _loss_fn(self, output, y_onehot, l1dist, l2distsq, const, opt=False): real = (y_onehot * output).sum(dim=1) other = ((1.0 - y_onehot) * output - (y_onehot * TARGET_MULT)).max(1)[0] if self.targeted: loss_logits = clamp(other - real + self.confidence, min=0.) else: loss_logits = clamp(real - other + self.confidence, min=0.) loss_logits = torch.sum(const * loss_logits) loss_l2 = l2distsq.sum() if opt: loss = loss_logits + loss_l2 else: loss_l1 = self.beta * l1dist.sum() loss = loss_logits + loss_l2 + loss_l1 return loss
def test_clamp(): def _convert_to_float(x): return float(x) if x is not None else None def _convert_to_batch_tensor(x, data): return x * torch.ones_like(data) if x is not None else None def _convert_to_single_tensor(x, data): return x * torch.ones_like(data[0]) if x is not None else None for min, max in [(-1, None), (None, 1), (-1, 1)]: data = 3 * torch.randn((11, 12, 13)) case1 = clamp(data, min, max) case2 = clamp(data, _convert_to_float(min), _convert_to_float(max)) case3 = clamp(data, _convert_to_batch_tensor(min, data), _convert_to_batch_tensor(max, data)) case4 = clamp(data, _convert_to_single_tensor(min, data), _convert_to_single_tensor(max, data)) assert torch.all(case1 == case2) assert torch.all(case2 == case3) assert torch.all(case3 == case4)
def _loss_fn_spatial(self, grid, x, y, const, grid_ori): imgs = x.clone() grid = torch.from_numpy(grid.reshape(grid_ori.shape)).float().to( x.device).requires_grad_() delta = grid_ori - grid adv_img = F.grid_sample(imgs, grid) output = self.predict(adv_img) real = (y * output).sum(dim=1) other = ((1.0 - y) * output - (y * TARGET_MULT)).max(1)[0] if self.targeted: loss1 = clamp(other - real + self.confidence, min=0.) else: loss1 = clamp(real - other + self.confidence, min=0.) loss2 = self.initial_const * (torch.sqrt( (((delta[:, :, 1:] - delta[:, :, :-1] + 1e-10)**2)).view( delta.shape[0], -1).sum(1)) + torch.sqrt( ((delta[:, 1:, :] - delta[:, :-1, :] + 1e-10)**2).view( delta.shape[0], -1).sum(1))) loss = torch.sum(loss1) + torch.sum(loss2) loss.backward() grad_ret = grid.grad.data.cpu().numpy().flatten().astype(float) grid.grad.data.zero_() return loss.data.cpu().numpy().astype(float), grad_ret
def perturb(self, x, y, target_y=None): with ctx_noparamgrad_and_eval(self.predict): if self.pgdadv.targeted: self.target_y = target_y xadv = self.pgdadv.perturb(x, target_y) adv_pred = self.pgdadv.predict(xadv).argmax(1) # print((adv_pred == target_y).float().mean()) else: xadv = self.pgdadv.perturb(x, y) # print(self.pgdadv.eps, x.shape, xadv.shape, torch.norm((x-xadv).view(x.shape[0],-1), p=float('inf'), dim=1).mean()) unitptb, curr_eps = self._get_unitptb_and_eps( xadv, x, y, self.pgdadv.eps) xadv = clamp(x + batch_multiply(curr_eps, unitptb), min=self.pgdadv.clip_min, max=self.pgdadv.clip_max) # print('') return xadv
def whitebox_pgd(args, image, target, model, normalize=None): adversary = LinfPGDAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) adv_image = adversary.perturb(image, target) print("Target is %d" %(target)) pred = model(adv_image) out = pred.max(1, keepdim=True)[1] # get the index of the max log-probability print("Adv Target is %d" %(out)) clean_image = (image)[0].detach() adv_image = adv_image[0].detach() if args.comet: plot_image_to_comet(args,clean_image,"clean.png") plot_image_to_comet(args,adv_image,"Adv.png") return pred, clamp(clean_image - adv_image,0.,1.)
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) shape, flat_x = _flatten(x) data_shape = tuple(shape[1:]) def f(x): new_shape = (x.shape[0], ) + data_shape input = x.reshape(new_shape) return self.predict(input) f_nes = NESWrapper(f, nb_samples=self.nb_samples, fd_eta=self.fd_eta) delta = torch.zeros_like(flat_x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta(delta, flat_x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp(flat_x + delta.data, min=self.clip_min, max=self.clip_max) - flat_x rval = perturb_iterative(flat_x, y, f_nes, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=None) return rval.data.reshape(shape)
def single_white_box_generator(args, image, target, model, G): epsilon = 0.5 # Create noise vector x = image opt = optim.SGD(G.parameters(), lr=1e-2) print("Target is %d" % (target)) for t in range(args.PGD_steps): delta, kl_div = G(x) delta = delta.view(delta.size(0), 1, 28, 28) delta.data.clamp_(-epsilon, epsilon) delta.data = clamp(x.data + delta.data, 0., 1.) - x.data pred = model(x.detach() + delta) out = pred.max( 1, keepdim=True)[1] # get the index of the max log-probability loss = -nn.CrossEntropyLoss(reduction="sum")(pred, target) if args.comet: args.experiment.log_metric("Whitebox CE loss", loss, step=t) if t % 5 == 0: print(t, out[0][0], loss.item()) opt.zero_grad() loss.backward() for p in G.parameters(): p.grad.data.sign_() # Clipping is equivalent to projecting back onto the l_\infty ball # This technique is known as projected gradient descent (PGD) # delta.data.clamp_(-epsilon, epsilon) # delta.data = clamp(x.data + delta.data,0.,1.) - x.data opt.step() if out != target: print(t, out[0][0], loss.item()) break if args.comet: if not args.mnist: clean_image = (image)[0].detach().cpu().numpy().transpose(1, 2, 0) adv_image = (x + delta)[0].detach().cpu().numpy().transpose( 1, 2, 0) delta_image = (delta)[0].detach().cpu().numpy().transpose(1, 2, 0) else: clean_image = (image)[0].detach() adv_image = (x + delta)[0].detach() delta_image = (delta)[0].detach() plot_image_to_comet(args, clean_image, "clean.png") plot_image_to_comet(args, adv_image, "Adv.png") plot_image_to_comet(args, delta_image, "delta.png") return out, delta
def _rescale_x_score(self, predict, x, y, ori, best_dist): x = torch.stack(x) x = self._revert_rescale(x) batch_logits = predict(x) scores = nn.Softmax(dim=1)(batch_logits)[:, y] if not self.comply_with_foolbox: x = clamp(x, self.clip_min, self.clip_max) batch_logits = predict(x) _, bests = torch.max(batch_logits, dim=1) best_img = None for ii in range(len(bests)): curr_dist = torch.sum((x[ii] - ori)**2) if (is_successful(int(bests[ii]), y, self.targeted) and curr_dist < best_dist): best_img = x[ii] best_dist = curr_dist scores = nn.Softmax(dim=1)(batch_logits)[:, y] return scores, best_img, best_dist
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta( delta, x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x rval, delta = masked_perturb_iterative( x, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=self.l1_sparsity, mask_steps=self.mask_steps, device=self.device ) file_name = self.experiment_name + "_iter" + str(self.nb_iter) + "_delta" f = open(file_name+'.npy', 'wb') np.save(f, delta.detach().cpu().numpy()) return rval.data
def _get_unitptb_and_eps(self, xadv, x, y, prev_eps): unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x)) adv_logits = self.predict(xadv) logit_margin = elementwise_margin(adv_logits, y) ones = torch.ones_like(y).float() # maxeps = self.maxeps * ones maxeps = torch.norm((xadv-x).view(x.shape[0],-1), p=self.pgdadv.ord, dim=1) adv_pred = adv_logits.argmax(1) # print(1 - (adv_pred == y).float().mean()) # print(maxeps.min(), maxeps.max()) pred = adv_pred.clone() i=0 # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max())) while i < 10: if self.pgdadv.targeted: unsuccessful_adv_idx = (adv_pred != self.target_y) & (pred != self.target_y) if not unsuccessful_adv_idx.any(): break else: unsuccessful_adv_idx = (adv_pred == y) & (pred == y) maxeps[unsuccessful_adv_idx] *= 1.5 maxeps_ = maxeps[unsuccessful_adv_idx] unitptb_ = unitptb[unsuccessful_adv_idx] x_ = x[unsuccessful_adv_idx] x_ = clamp(x_ + batch_multiply(maxeps_, unitptb_), min=0., max=1.) pred[unsuccessful_adv_idx] = self.predict(x_).argmax(1) i += 1 # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max())) # print(logit_margin) curr_eps = bisection_search( maxeps * 0.5, unitptb, self.predict, x, y, elementwise_margin, logit_margin, maxeps, self.num_search_steps) if self.pgdadv.targeted: curr_eps[pred != self.target_y] = np.inf return unitptb, curr_eps
def bisection_search(cur_eps, ptb, model, data, label, fn_margin, margin_init, maxeps, num_steps, cur_min=None, clip_min=0., clip_max=1.): assert torch.all(cur_eps <= maxeps) margin = margin_init if cur_min is None: cur_min = torch.zeros_like(margin) cur_max = maxeps.clone().detach() for ii in range(num_steps): cur_min = torch.max((margin < 0).float() * cur_eps, cur_min) cur_max = torch.min( ((margin < 0).float() * maxeps + (margin >= 0).float() * cur_eps), cur_max) cur_eps = (cur_min + cur_max) / 2 margin = fn_margin( model( clamp(data + batch_multiply(cur_eps, ptb), min=clip_min, max=clip_max)), label) assert torch.all(cur_eps <= maxeps) return cur_eps
def perturb_iterative(xvar, yvar, predict1, predict2, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, l1_sparsity=None): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :param l1_sparsity: sparsity value for L1 projection. - if None, then perform regular L1 projection. - if float value, then perform sparse L1 descent from Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in range(nb_iter): if predict2 is not None: outputs = predict2(predict1(xvar + delta)) else: outputs = predict1(xvar + delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) elif ord == 1: grad = delta.grad.data abs_grad = torch.abs(grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) if l1_sparsity is None: vals, idx = view.topk(1) else: vals, idx = view.topk( int(np.round((1 - l1_sparsity) * view_size))) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign() * (out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if xvar.is_cuda: delta.data = delta.data.cuda() delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv
def _get_arctanh_x(self, x): result = clamp((x - self.clip_min) / (self.clip_max - self.clip_min), min=self.clip_min, max=self.clip_max) * 2 - 1 return torch_arctanh(result * ONE_MINUS_EPS)