def ifgs(model, input, target, max_iter=10, step_size=0.01, train_mode=False, mode=None, verbose=True): if train_mode: model.train() else: model.eval() pred = util.get_labels(model, input) corr = pred.eq(target) r = torch.zeros(input.size()) for _ in range(max_iter): _, ri = fgs(model, input, target, step_size, train_mode, mode, verbose=verbose) r = r + ri input = input + ri pred_xp = util.get_labels(model, input + r) status = torch.zeros(input.size(0)).long() status[corr] = 2 * pred[corr].ne(pred_xp[corr]).long() - 1 return (status, r)
def universal(model, input, target, n_classes, max_val=0.1, train_mode=False, max_iter=10, step_size=0.1, batch_size=25, data_dir=None, r=None, verbose=True): pred = util.get_labels(model, input, batch_size) if r is None: r = torch.zeros(input[0].size()) perm = torch.randperm(input.size(0)) for i in range(input.size(0)): idx = perm[i] if verbose: print('sample %d: index %d' % (i + 1, idx)) x_adv = torch.autograd.Variable((input[idx] + r)) x_adv = x_adv.expand(1, input.size(1), input.size(2), input.size(3)) output = model.forward(x_adv) _, pred_adv = output.max(1) pred_adv = pred_adv.data.cpu()[0][0] if pred[idx] == pred_adv: succ, ri = deepfool_single(model, input[idx] + r, pred[idx], n_classes, train_mode, max_iter, step_size, batch_size, data_dir) if succ: r = (r + ri).clamp(-max_val, max_val) x = input + r.expand_as(input) pred_xp = util.get_labels(model, x) status = 2 * pred_xp.ne(target).long() - 1 status[pred.ne(target)] = 0 return (status, r)
def deepfool(model, input, target, n_classes, train_mode=False, max_iter=5, step_size=0.1, batch_size=25, labels=None): pred = util.get_labels(model, input, batch_size) status = torch.zeros(input.size(0)).long() r = torch.zeros(input.size()) for i in range(input.size(0)): status[i], r[i] = deepfool_single(model, input[i], target[i], n_classes, train_mode, max_iter, step_size, batch_size, labels) status = 2 * status - 1 status[pred.ne(target)] = 0 return (status, r)
def attacking_mahalanobis_all(model, input, target, weight, loss_str, sample_mean, inv_sample_conv, ensemble_weight, bound=0, max_iter=100, step_size=0.01, kappa=0, p=2, drop_rate=0.0, train_mode=False, verbose=False): is_gpu = next(model.parameters()).is_cuda if train_mode: model.train() else: model.eval() pred = util.get_labels(model, input) corr = pred.eq(target) w = torch.autograd.Variable(input, requires_grad=True) best_w = torch.Tensor(input.size()) best_w.copy_(input) best_loss = float('inf') optimizer = torch.optim.Adam([w], lr=step_size) input_var = torch.autograd.Variable(input) input_vec = input.view(input.size(0), -1) probs = util.get_probs(model, input) _, top2 = probs.topk(2, 1) argmax = top2[:, 0] top2_index = top2[:, 1] for j in range(top2.size(0)): if argmax[j] == target[j]: argmax[j] = top2[j, 1] mahalanobis_criterion = _MahalanobisEnsembleLoss().cuda() print('attack all') for i in range(max_iter): if i > 0: w.grad.data.fill_(0) model.zero_grad() # Norm constraints if loss_str == 'l2': loss = torch.pow(w - input_var, 2).sum() elif loss_str == 'linf': loss = torch.clamp((w - input_var).abs() - bound, min=0).sum() else: raise ValueError('Unsupported loss: %s' % loss_str) recons_loss = loss.data[0] # Adversarial loss w_data = w.data w_in = torch.autograd.Variable(w_data, requires_grad=True) output, out_features = model.feature_list(w_in) for j in range(len(out_features)): out_features[j] = out_features[j].view(out_features[j].size(0), out_features[j].size(1), -1) out_features[j] = torch.mean(out_features[j], 2) for j in range(output.size(0)): loss += weight * torch.clamp( output[j][target[j]] - output[j][argmax[j]] + kappa, min=0) temp_loss = mahalanobis_criterion(out_features, sample_mean, inv_sample_conv, ensemble_weight, top2_index) adv_loss = loss.data[0] - recons_loss if is_gpu: loss = loss.cuda() loss.backward() w.grad.data.add_(w_in.grad.data) w_cpu = w.data.cpu().numpy() input_np = input.cpu().numpy() optimizer.step() total_loss = loss.data.cpu()[0] # w.data = utils.img_to_tensor(utils.transform_img(w.data), scale=False) output_vec = w.data preds = util.get_labels(model, output_vec) output_vec = output_vec.view(output_vec.size(0), -1) diff = (input_vec - output_vec).norm(2, 1).squeeze() diff = diff.div(input_vec.norm(2, 1).squeeze()) rb = diff.mean() sr = float(preds.ne(target).sum()) / target.size(0) if verbose: print('iteration %d: loss = %f, %s_loss = %f, ' 'adv_loss = %f' % (i + 1, total_loss, loss_str, recons_loss, adv_loss)) print('robustness = %f, success rate = %f' % (rb, sr)) if total_loss < best_loss: best_loss = total_loss best_w = w.data.clone() pred_xp = util.get_labels(model, best_w) status = torch.zeros(input.size(0)).long() status[corr] = 2 * pred[corr].ne(pred_xp[corr]).long() - 1 return (status, best_w)
def cw(model, input, target, weight, loss_str, bound=0, tv_weight=0, max_iter=100, step_size=0.01, kappa=0, p=2, crop_frac=1.0, drop_rate=0.0, train_mode=False, verbose=False): is_gpu = next(model.parameters()).is_cuda if train_mode: model.train() else: model.eval() pred = util.get_labels(model, input) corr = pred.eq(target) w = torch.autograd.Variable(input, requires_grad=True) best_w = torch.Tensor(input.size()) best_w.copy_(input) best_loss = float('inf') optimizer = torch.optim.Adam([w], lr=step_size) input_var = torch.autograd.Variable(input) input_vec = input.view(input.size(0), -1) to_pil = trans.ToPILImage() scale_up = trans.Resize((w.size(2), w.size(3))) scale_down = trans.Resize( (int(crop_frac * w.size(2)), int(crop_frac * w.size(3)))) to_tensor = trans.ToTensor() probs = util.get_probs(model, input) _, top2 = probs.topk(2, 1) argmax = top2[:, 0] for j in range(top2.size(0)): if argmax[j] == target[j]: argmax[j] = top2[j, 1] for i in range(max_iter): if i > 0: w.grad.data.fill_(0) model.zero_grad() if loss_str == 'l2': loss = torch.pow(w - input_var, 2).sum() elif loss_str == 'linf': loss = torch.clamp((w - input_var).abs() - bound, min=0).sum() else: raise ValueError('Unsupported loss: %s' % loss_str) recons_loss = loss.data.item() w_data = w.data if crop_frac < 1 and i % 3 == 1: w_cropped = torch.zeros(w.size(0), w.size(1), int(crop_frac * w.size(2)), int(crop_frac * w.size(3))) locs = torch.zeros(w.size(0), 4).long() w_in = torch.zeros(w.size()) for m in range(w.size(0)): locs[m] = torch.LongTensor( Crop('random', crop_frac)(w_data[m])) w_cropped = w_data[m, :, locs[m][0]:(locs[m][0] + locs[m][2]), locs[m][1]:(locs[m][1] + locs[m][3])] minimum = w_cropped.min() maximum = w_cropped.max() - minimum w_in[m] = to_tensor( scale_up(to_pil((w_cropped - minimum) / maximum))) w_in[m] = w_in[m] * maximum + minimum w_in = torch.autograd.Variable(w_in, requires_grad=True) else: w_in = torch.autograd.Variable(w_data, requires_grad=True) if drop_rate == 0 and i % 3 == 2: output = model.forward(w_in) else: output = model.forward(torch.nn.Dropout(p=drop_rate).forward(w_in)) for j in range(output.size(0)): loss += weight * torch.clamp( output[j][target[j]] - output[j][argmax[j]] + kappa, min=0) adv_loss = loss.data.item() - recons_loss if is_gpu: loss = loss.cuda() loss.backward() if crop_frac < 1 and i % 3 == 1: grad_full = torch.zeros(w.size()) grad_cpu = w_in.grad.data for m in range(w.size(0)): minimum = grad_cpu[m].min() maximum = grad_cpu[m].max() - minimum grad_m = to_tensor( scale_down(to_pil((grad_cpu[m] - minimum) / maximum))) grad_m = grad_m * maximum + minimum grad_full[m, :, locs[m][0]:(locs[m][0] + locs[m][2]), locs[m][1]:(locs[m][1] + locs[m][3])] = grad_m w.grad.data.add_(grad_full) else: w.grad.data.add_(w_in.grad.data) w_cpu = w.data.cpu().numpy() input_np = input.cpu().numpy() tv_loss = 0 if tv_weight > 0: for j in range(output.size(0)): for k in range(3): tv_loss += tv_weight * minimize_tv.tv( w_cpu[j, k] - input_np[j, k], p) grad = tv_weight * torch.from_numpy( minimize_tv.tv_dx(w_cpu[j, k] - input_np[j, k], p)) w.grad.data[j, k].add_(grad.float()) optimizer.step() total_loss = loss.data.cpu().item() + tv_loss # w.data = utils.img_to_tensor(utils.transform_img(w.data), scale=False) output_vec = w.data preds = util.get_labels(model, output_vec) output_vec = output_vec.view(output_vec.size(0), -1) diff = (input_vec - output_vec).norm(2, 1).squeeze() diff = diff.div(input_vec.norm(2, 1).squeeze()) rb = diff.mean() sr = float(preds.ne(target).sum()) / target.size(0) if verbose: print( 'iteration %d: loss = %f, %s_loss = %f, ' 'adv_loss = %f, tv_loss = %f' % (i + 1, total_loss, loss_str, recons_loss, adv_loss, tv_loss)) print('robustness = %f, success rate = %f' % (rb, sr)) if total_loss < best_loss: best_loss = total_loss best_w = w.data.clone() pred_xp = util.get_labels(model, best_w) status = torch.zeros(input.size(0)).long() status[corr] = 2 * pred[corr].ne(pred_xp[corr]).long() - 1 return (status, best_w)