def train_step( X, y, model, optimizer, scheduler, # generic reg_weight, epsilon, criterion, fim_reg # experiment-specific ) -> tuple: """Penalize the Frobenious-norm of the Fisher Information Matrix (FIM). """ model.train() model.apply(zero_grad) with toggle_requires_grad(X, True): # clean loss output = model(X) loss = criterion(output, y) loss.backward(retain_graph=True) # Jacobian regularizer fim_weights = softmax_reciprocal(logits=output) loss_fim = fim_reg(X, output, epsilon=epsilon, weights=fim_weights) loss_fim *= reg_weight # update model loss_fim.backward() optimizer.step() scheduler.step() return loss, output
def train_step( X, y, model, optimizer, scheduler, # generic reg_weight, geometry, epsilon, alpha, lower_limit, upper_limit, attack_iters, criterion # experiment-specific ): """TRADES algorithm [https://arxiv.org/abs/1901.08573]. """ ### adversarial perturbation # init perturbation delta = torch.zeros_like(X) for i in range(len(epsilon)): delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item()) delta.data = clamp(delta, lower_limit - X, upper_limit - X) # find approx optimal perturbation criterion_kl = nn.KLDivLoss(reduction='sum') with toggle_eval(model): # turn off batchnorm stat tracking for _ in range(attack_iters): with toggle_requires_grad(delta, True): grad = torch.autograd.grad(outputs=criterion_kl( F.log_softmax(model(X + delta), dim=1), F.softmax(model(X), dim=1)), inputs=delta, only_inputs=True)[0] delta = project(delta + alpha * grad.sign(), epsilon, geometry=geometry) delta = clamp(delta, lower_limit - X, upper_limit - X) ### adversarial loss model.apply(zero_grad) # disable batchnorm stat tracking to avoid distribution shift from adversarial examples with disable_batchnorm_tracking(model): loss = criterion_kl(F.log_softmax(model(X + delta), dim=1), F.softmax(model(X), dim=1)) loss *= (reg_weight / X.shape[0]) loss.backward() ### clean loss # batchnorm stat tracking is fine here output = model(X) loss = criterion(output, y) ### update model loss.backward() optimizer.step() scheduler.step() return loss, output
def train_step( X, y, model, optimizer, scheduler, # generic epsilon, alpha, lower_limit, upper_limit, criterion, # experiment-specific ) -> tuple: """FGSM with configuration from Fast is Better than Free [https://arxiv.org/abs/2001.03994]. """ ### adversarial perturbation # init delta = torch.zeros_like(X) for j in range(len(epsilon)): delta[:, j, :, :].uniform_(-epsilon[j][0][0].item(), epsilon[j][0][0].item()) delta.data = clamp(delta, lower_limit - X, upper_limit - X) # perturb with toggle_eval(model), toggle_requires_grad( delta, True): # turn off batch norm statistics updating grad = torch.autograd.grad(outputs=criterion(model(X + delta), y), inputs=delta, only_inputs=True)[0] delta = project(delta + alpha * torch.sign(grad), epsilon, geometry='linf') delta = clamp(delta, lower_limit - X, upper_limit - X) ### adversarial loss output = model(X + delta) loss = criterion(output, y) ### update model model.apply(zero_grad) loss.backward() optimizer.step() scheduler.step() # return clean predictions with toggle_eval(model), torch.no_grad(): logits = model(X) loss = criterion(logits, y) return loss, logits
def train_step( X, y, model, optimizer, scheduler, # generic geometry, epsilon, alpha, lower_limit, upper_limit, attack_iters, criterion # experiment-specific ) -> tuple: """Madry PGD algorithm [https://arxiv.org/abs/1706.06083] """ ### adversarial perturbation # init delta = torch.zeros_like(X) for i in range(len(epsilon)): delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item()) delta.data = clamp(delta, lower_limit - X, upper_limit - X) # perturb with toggle_eval(model): # turn off batch normalization statistics updating for _ in range(attack_iters): with toggle_requires_grad(delta, True): grad = torch.autograd.grad( outputs =criterion(model(X + delta), y), inputs =delta, only_inputs=True)[0] delta = project(delta + alpha * grad.sign(), epsilon, geometry=geometry) delta = clamp(delta, lower_limit - X, upper_limit - X) ### compute loss output = model(X + delta) loss = criterion(output, y) ### update model model.apply(zero_grad) loss.backward() optimizer.step() scheduler.step() with toggle_eval(model), torch.no_grad(): logits = model(X) loss = criterion(logits, y) return loss, logits
def train_step( X, y, model, optimizer, scheduler, # generic reg_weight, epsilon, criterion # implementation-specific ) -> tuple: """Input gradient regularization in the dual norm. """ model.train() model.apply(zero_grad) # compute classification loss first to avoid two forward passes with toggle_requires_grad(X, True): output = model(X) loss = criterion(output, y) # compute gradient penalty grad = torch.autograd.grad(loss, X, create_graph=True, only_inputs=True)[0] grad *= epsilon # adversarial weight (note: tensor-valued) loss_grad = grad.flatten(1).norm(p=1, dim=1).mean(dim=0) # dual norm to linf loss_grad *= reg_weight # additional regularization weight # update model loss.backward(retain_graph=True) loss_grad.backward() optimizer.step() scheduler.step() return loss, output
def inversion( model :Module, x0 :Tensor, *, stepsize :float, category :int, untargeted :bool =False, max_iters :int =10000, clamp :Optional[torch.Tensor] =None, keep_path :bool =False, geometry :str ='linf', ) -> Union[List[Tensor], Tensor]: """Runs adversarial perturbation -- both targeted and untargeted version are available. No constraints are assumed by default but can be added via the clamp argument. This means that only the projection operator is only defined for l_\infty balls. :param model : PyTorch module :param x0 : Initialization point :param stepsize : The stepsize of each gradient ascent step :param category : The target label for cross-entropy loss :param untargeted : (False) - Targeted perturbation. (True) - Untargeted perturbation. :param max_iters : Number of iterations. Currently there is no convergence criterion. :param clamp : A tensor specifying the range of values to restrict the pixel values to. A range needs to be specified for each color channel. :param keep_path : Flag to retain the perturbation iterates. Useful for visualization. :param geometry : ('l2' or 'linf') Under which geometry to normalize the gradient directions. """ device = next(model.parameters()).device model.eval() batch_size = x0.shape[0] if isinstance(category, int): category = torch.LongTensor([category]).repeat(batch_size).to(device) else: category = category.to(device) x_inv = x0.clone().to(device) if clamp is not None: assert len(clamp) == x0.shape[1] assert len(clamp[0]) == 2 if keep_path: path = [] for n_iter in range(max_iters): with toggle_requires_grad(model, False), toggle_requires_grad(x_inv, True): grad = torch.autograd.grad( outputs =F.cross_entropy(model(x_inv), category), inputs =x_inv, only_inputs=True)[0] if geometry == 'l2': norm = grad.flatten(1).norm(dim=1)[:, None, None, None] x_inv -= stepsize * grad / (norm + 1e-10) elif geometry == 'linf': x_inv -= stepsize * grad.sign() else: raise NotImplementedError if clamp is not None: # clamp each channel to proper range for i in range(len(clamp)): x_inv[:,i] = torch.clamp(x_inv[:,i], *clamp[i]) if keep_path: path.append(x_inv.clone().cpu()) if keep_path: x_inv = path return x_inv