def main(): DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format( now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef=args.adv_coef) if config.eval_interval > 0 and now_epoch % config.eval_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name=os.path.join( config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch)))
def main(): args = parse_args() random.seed(args.seed) torch.manual_seed(args.seed) logging.info('Reading dataset metadata') train_loader, val_loader = get_data_loaders(args) args.classification_weights = train_loader.dataset.classification_weights logging.info('Constructing model') model, loss, restarting_epoch = get_model_and_loss(args) if args.mode == 'train': if not args.data_parallel: optimizer = model.optimizer() else: optimizer = torch.optim.Adam(model.parameters(), lr=args.base_lr) for i in range(restarting_epoch, args.epochs): train_one_epoch(model, loss, optimizer, train_loader, i + 1, args) if i % args.save_frequency == 0: torch.save( model.state_dict(), os.path.join(args.save, 'model_state_{:02d}.pytar'.format(i + 1))) elif args.mode == 'test' or args.mode == 'testtrain': if args.mode == 'testtrain': val_loader = train_loader if args.reload_dir is not None: all_saved_models = [ f for f in os.listdir(args.reload_dir) if f.endswith('.pytar') ] all_indices = [ f.split('_')[-1].replace('.pytar', '') for f in all_saved_models ] int_indices = [int(f) for f in all_indices] int_indices.sort() for epoch in int_indices: args.reload = os.path.join( args.reload_dir, 'model_state_{:02d}.pytar'.format(epoch)) model, loss, restarting_epoch = get_model_and_loss(args) test_one_epoch(model, loss, val_loader, epoch, args) else: test_one_epoch(model, loss, val_loader, 0, args) else: raise NotImplementedError("Unsupported mode {}".format(args.mode))
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' torch.backends.cudnn.benchmark = True net = create_network() net.to(device) criterion = config.create_loss_function().to(device) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) train_attack = config.create_attack_method(device) eval_attack = config.create_evaluation_attack_method(device) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler) for i in range(now_epoch, config.num_epochs): # if now_epoch > config.num_epochs: # break # now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(i, config.num_epochs, lr_scheduler.get_last_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, device, descrip_str, train_attack, adv_coef=args.adv_coef) if config.eval_interval > 0 and i % config.eval_interval == 0: eval_one_epoch(net, ds_val, device, eval_attack) lr_scheduler.step() save_checkpoint(i, net, optimizer, lr_scheduler, file_name=os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(i)))
ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef = args.adv_coef) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch)))
args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format( now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, attack_freq=args.attack_interval) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name=os.path.join( config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch)))
args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format( now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch( net, ds_train, optimizer, criterion, DEVICE, descrip_str, ) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch( net, ds_val, DEVICE, ) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer,
for epoch in range(1, settings.EPOCH2): if epoch > args.warm: train_scheduler.step(epoch) # print('Epoch {}'.format(epoch)) # train(epoch) if args.warm > 0 and epoch <= args.warm: train(epoch) elif args.train == 'pgd5': trainattack = config.create_evaluation_attack_method4( torch.device('cuda:{}'.format(0))) train_one_epoch(net=net, batch_generator=cifar10_training_loader, optimizer=optimizer, criterion=loss_function, DEVICE=torch.device('cuda:{}'.format(0)), descrip_str=str(epoch) + 'Training', AttackMethod=trainattack) elif args.train == 'pgd3': trainattack = config.create_attack_method( torch.device('cuda:{}'.format(0))) train_one_epoch(net=net, batch_generator=cifar10_training_loader, optimizer=optimizer, criterion=loss_function, DEVICE=torch.device('cuda:{}'.format(0)), descrip_str=str(epoch) + 'Training', AttackMethod=trainattack) elif args.train == 'pgd10': trainattack = config.create_attack_method10(