def main(): model = StandardModel(args.dataset, args.arch, no_grad=False, load_pretrained=False) model.cuda() model.train() device = torch.device("cuda") optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model_path = '{}/train_pytorch_model/adversarial_train/TRADES/{}@{}@epoch_{}@batch_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.epochs, args.batch_size) os.makedirs(os.path.dirname(model_path), exist_ok=True) print("After trained, the model will save to {}".format(model_path)) for epoch in range(1, args.epochs + 1): # adjust learning rate for SGD adjust_learning_rate(optimizer, epoch) # adversarial training train(args, model, device, train_loader, optimizer, epoch) # evaluation on natural examples print( '================================================================') eval_train(model, device, train_loader) eval_test(model, device, test_loader) print( '================================================================') # save checkpoint if epoch % args.save_freq == 0: state = { 'state_dict': model.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict() } torch.save(state, os.path.join(model_dir, model_path))
def onehot(ind, num_classes): vector = np.zeros([num_classes]) vector[ind] = 1 return vector.astype(np.float32) logger.info("build dataloader") train_loader = DataLoaderMaker.get_img_label_data_loader( args.dataset, args.batch_size, True) val_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False) model = StandardModel(args.dataset, args.arch, no_grad=False) model.cuda() model.train() def anneal_lr(epoch): if epoch < 100: return 1. elif epoch < 150: return 0.1 else: return 0.01 pgd_kwargs = { "eps": 16. / 255., "eps_iter": 4. / 255., "nb_iter": 10,
def main(): args = get_args() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) logger.info(args) model_path = '{}/train_pytorch_model/adversarial_train/fast_adv_train/{}@{}@epoch_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.epochs) out_dir = os.path.dirname(model_path) os.makedirs(out_dir, exist_ok=True) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) start_start_time = time.time() train_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, True) test_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False) epsilon = (args.epsilon / 255.) / std pgd_alpha = (args.pgd_alpha / 255.) / std model = StandardModel(args.dataset, args.arch, no_grad=False) model.apply(initialize_weights) model.cuda() model.train() opt = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=0.9, weight_decay=5e-4) model, opt = amp.initialize(model, opt, opt_level="O2", loss_scale=1.0, master_weights=False) criterion = nn.CrossEntropyLoss() if args.attack == 'free': delta = torch.zeros(args.batch_size, 3, 32, 32).cuda() delta.requires_grad = True elif args.attack == 'fgsm' and args.fgsm_init == 'previous': delta = torch.zeros(args.batch_size, 3, 32, 32).cuda() delta.requires_grad = True if args.attack == 'free': assert args.epochs % args.attack_iters == 0 epochs = int(math.ceil(args.epochs / args.attack_iters)) else: epochs = args.epochs if args.lr_schedule == 'cyclic': lr_schedule = lambda t: np.interp([t], [0, args.epochs * 2 // 5, args.epochs], [0, args.lr_max, 0])[0] elif args.lr_schedule == 'piecewise': def lr_schedule(t): if t / args.epochs < 0.5: return args.lr_max elif t / args.epochs < 0.75: return args.lr_max / 10. else: return args.lr_max / 100. prev_robust_acc = 0. logger.info('Epoch \t Time \t LR \t \t Train Loss \t Train Acc') for epoch in range(epochs): start_time = time.time() train_loss = 0 train_acc = 0 train_n = 0 for i, (X, y) in enumerate(train_loader): X = X.cuda().float() y = y.cuda().long() if i == 0: first_batch = X, y lr = lr_schedule(epoch + (i + 1) / len(train_loader)) opt.param_groups[0].update(lr=lr) if args.attack == 'pgd': delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt) elif args.attack == 'fgsm': if args.fgsm_init == 'zero': delta = torch.zeros_like(X, requires_grad=True) delta.requires_grad = True elif args.fgsm_init == 'random': delta = torch.zeros_like(X).cuda() delta[:, 0, :, :].uniform_(-epsilon[0][0][0].item(), epsilon[0][0][0].item()) delta[:, 1, :, :].uniform_(-epsilon[1][0][0].item(), epsilon[1][0][0].item()) delta[:, 2, :, :].uniform_(-epsilon[2][0][0].item(), epsilon[2][0][0].item()) delta.requires_grad = True elif args.fgsm_init == 'previous': delta.requires_grad = True output = model(X + delta[:X.size(0)]) loss = F.cross_entropy(output, y) with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() grad = delta.grad.detach() delta.data = clamp(delta + args.fgsm_alpha * epsilon * torch.sign(grad), -epsilon, epsilon) delta = delta.detach() elif args.attack == 'free': delta.requires_grad = True for j in range(args.attack_iters): epoch_iters = epoch * args.attack_iters + (i * args.attack_iters + j + 1) / len(train_loader) lr = lr_schedule(epoch_iters) opt.param_groups[0].update(lr=lr) output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit)) loss = F.cross_entropy(output, y) opt.zero_grad() with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() grad = delta.grad.detach() delta.data = clamp(delta + epsilon * torch.sign(grad), -epsilon, epsilon) nn.utils.clip_grad_norm_(model.parameters(), 0.5) opt.step() delta.grad.zero_() elif args.attack == 'none': delta = torch.zeros_like(X) output = model(clamp(X + delta[:X.size(0)], lower_limit, upper_limit)) loss = criterion(output, y) if args.attack != 'free': opt.zero_grad() with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 0.5) opt.step() train_loss += loss.item() * y.size(0) train_acc += (output.max(1)[1] == y).sum().item() train_n += y.size(0) if args.overfit_check: # Check current PGD robustness of model using random minibatch X, y = first_batch['input'], first_batch['target'] pgd_delta = attack_pgd(model, X, y, epsilon, pgd_alpha, args.attack_iters, args.restarts, opt) with torch.no_grad(): output = model(clamp(X + pgd_delta[:X.size(0)], lower_limit, upper_limit)) robust_acc = (output.max(1)[1] == y).sum().item() / y.size(0) if robust_acc - prev_robust_acc < -0.5: break prev_robust_acc = robust_acc best_state_dict = copy.deepcopy(model.state_dict()) train_time = time.time() logger.info('%d \t %.1f \t %.4f \t %.4f \t %.4f', epoch, train_time - start_time, lr, train_loss/train_n, train_acc/train_n) torch.save(best_state_dict, model_path) logger.info('Total time: %.4f', train_time - start_start_time)