device = 'cuda' if torch.cuda.is_available() else 'cpu' start_epoch = 0 model_path = '{}/train_pytorch_model/adversarial_train/feature_scatter/{}@{}@epoch_{}@batch_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.max_epoch, args.batch_size_train) print("model will be saved to {}".format(model_path)) os.makedirs(os.path.dirname(model_path), exist_ok=True) # Data print('==> Preparing data..') train_loader = DataLoaderMaker.get_img_label_data_loader( args.dataset, args.batch_size_train, True) print('==> Building model..') basic_net = StandardModel(args.dataset, args.arch, no_grad=False).train().cuda() basic_net.apply(initialize_weights) def print_para(net): for name, param in net.named_parameters(): if param.requires_grad: print(name) print(param.data) break # config for feature scatter config_feature_scatter = { 'train': True, 'epsilon': 8.0 / 255 * 2, 'num_steps': 1,
def main(): args = get_args() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) logger.info(args) model_path = '{}/train_pytorch_model/adversarial_train/fast_adv_train/{}@{}@epoch_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.epochs) out_dir = os.path.dirname(model_path) os.makedirs(out_dir, exist_ok=True) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) start_start_time = time.time() train_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, True) test_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False) epsilon = (args.epsilon / 255.) / std pgd_alpha = (args.pgd_alpha / 255.) / std model = StandardModel(args.dataset, args.arch, no_grad=False) model.apply(initialize_weights) model.cuda() model.train() opt = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=0.9, weight_decay=5e-4) model, opt = amp.initialize(model, opt, opt_level="O2", loss_scale=1.0, master_weights=False) criterion = nn.CrossEntropyLoss() if args.attack == 'free': delta = torch.zeros(args.batch_size, 3, 32, 32).cuda() delta.requires_grad = True elif args.attack == 'fgsm' and args.fgsm_init == 'previous': delta = torch.zeros(args.batch_size, 3, 32, 32).cuda() delta.requires_grad = True if args.attack == 'free': assert args.epochs % args.attack_iters == 0 epochs = int(math.ceil(args.epochs / args.attack_iters)) else: epochs = args.epochs if args.lr_schedule == 'cyclic': lr_schedule = lambda t: np.interp([t], [0, args.epochs * 2 // 5, args.epochs], [0, args.lr_max, 0])[0] elif args.lr_schedule == 'piecewise': def lr_schedule(t): if t / args.epochs < 0.5: return args.lr_max elif t / args.epochs < 0.75: return args.lr_max / 10. else: return args.lr_max / 100. prev_robust_acc = 0. logger.info('Epoch \t Time \t LR \t \t Train Loss \t Train Acc') for epoch in range(epochs): start_time = time.time() train_loss = 0 train_acc = 0 train_n = 0 for i, (X, y) in enumerate(train_loader): X = X.cuda().float() y = y.cuda().long() if i == 0: first_batch = X, y lr = lr_schedule(epoch + (i + 1) / len(train_loader)) opt.param_groups[0].update(lr=lr) if args.attack == 'pgd': delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt) elif args.attack == 'fgsm': if args.fgsm_init == 'zero': delta = torch.zeros_like(X, requires_grad=True) delta.requires_grad = True elif args.fgsm_init == 'random': delta = torch.zeros_like(X).cuda() delta[:, 0, :, :].uniform_(-epsilon[0][0][0].item(), epsilon[0][0][0].item()) delta[:, 1, :, :].uniform_(-epsilon[1][0][0].item(), epsilon[1][0][0].item()) delta[:, 2, :, :].uniform_(-epsilon[2][0][0].item(), epsilon[2][0][0].item()) delta.requires_grad = True elif args.fgsm_init == 'previous': delta.requires_grad = True output = model(X + delta[:X.size(0)]) loss = F.cross_entropy(output, y) with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() grad = delta.grad.detach() delta.data = clamp(delta + args.fgsm_alpha * epsilon * torch.sign(grad), -epsilon, epsilon) delta = delta.detach() elif args.attack == 'free': delta.requires_grad = True for j in range(args.attack_iters): epoch_iters = epoch * args.attack_iters + (i * args.attack_iters + j + 1) / len(train_loader) lr = lr_schedule(epoch_iters) opt.param_groups[0].update(lr=lr) output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit)) loss = F.cross_entropy(output, y) opt.zero_grad() with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() grad = delta.grad.detach() delta.data = clamp(delta + epsilon * torch.sign(grad), -epsilon, epsilon) nn.utils.clip_grad_norm_(model.parameters(), 0.5) opt.step() delta.grad.zero_() elif args.attack == 'none': delta = torch.zeros_like(X) output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit)) loss = criterion(output, y) if args.attack != 'free': opt.zero_grad() with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 0.5) opt.step() train_loss += loss.item() * y.size(0) train_acc += (output.max(1)[1] == y).sum().item() train_n += y.size(0) if args.overfit_check: # Check current PGD robustness of model using random minibatch X, y = first_batch['input'], first_batch['target'] pgd_delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt) with torch.no_grad(): output = model(clamp(X + pgd_delta[:X.size(0)], lower_limit, upper_limit)) robust_acc = (output.max(1)[1] == y).sum().item() / y.size(0) if robust_acc - prev_robust_acc < -0.5: break prev_robust_acc = robust_acc best_state_dict = copy.deepcopy(model.state_dict()) train_time = time.time() logger.info('%d \t %.1f \t %.4f \t %.4f \t %.4f', epoch, train_time - start_time, lr, train_loss/train_n, train_acc/train_n) torch.save(best_state_dict, model_path) logger.info('Total time: %.4f', train_time - start_start_time)