Пример #1
0
def main():
    DEVICE = torch.device('cuda:{}'.format(args.d))
    torch.backends.cudnn.benchmark = True

    net = create_network()
    net.to(DEVICE)
    criterion = config.create_loss_function().to(DEVICE)

    optimizer = config.create_optimizer(net.parameters())
    lr_scheduler = config.create_lr_scheduler(optimizer)

    ds_train = create_train_dataset(args.batch_size)
    ds_val = create_test_dataset(args.batch_size)

    TrainAttack = config.create_attack_method(DEVICE)
    EvalAttack = config.create_evaluation_attack_method(DEVICE)

    now_epoch = 0

    if args.auto_continue:
        args.resume = os.path.join(config.model_dir, 'last.checkpoint')
    if args.resume is not None and os.path.isfile(args.resume):
        now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler)

    while True:
        if now_epoch > config.num_epochs:
            break
        now_epoch = now_epoch + 1

        descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(
            now_epoch, config.num_epochs,
            lr_scheduler.get_lr()[0])
        train_one_epoch(net,
                        ds_train,
                        optimizer,
                        criterion,
                        DEVICE,
                        descrip_str,
                        TrainAttack,
                        adv_coef=args.adv_coef)
        if config.eval_interval > 0 and now_epoch % config.eval_interval == 0:
            eval_one_epoch(net, ds_val, DEVICE, EvalAttack)

        lr_scheduler.step()

        save_checkpoint(now_epoch,
                        net,
                        optimizer,
                        lr_scheduler,
                        file_name=os.path.join(
                            config.model_dir,
                            'epoch-{}.checkpoint'.format(now_epoch)))
Пример #2
0
def main():
    args = parse_args()

    random.seed(args.seed)
    torch.manual_seed(args.seed)

    logging.info('Reading dataset metadata')
    train_loader, val_loader = get_data_loaders(args)
    args.classification_weights = train_loader.dataset.classification_weights

    logging.info('Constructing model')
    model, loss, restarting_epoch = get_model_and_loss(args)

    if args.mode == 'train':
        if not args.data_parallel:
            optimizer = model.optimizer()
        else:
            optimizer = torch.optim.Adam(model.parameters(), lr=args.base_lr)

        for i in range(restarting_epoch, args.epochs):
            train_one_epoch(model, loss, optimizer, train_loader, i + 1, args)
            if i % args.save_frequency == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.save,
                                 'model_state_{:02d}.pytar'.format(i + 1)))

    elif args.mode == 'test' or args.mode == 'testtrain':
        if args.mode == 'testtrain':
            val_loader = train_loader
        if args.reload_dir is not None:
            all_saved_models = [
                f for f in os.listdir(args.reload_dir) if f.endswith('.pytar')
            ]
            all_indices = [
                f.split('_')[-1].replace('.pytar', '')
                for f in all_saved_models
            ]
            int_indices = [int(f) for f in all_indices]
            int_indices.sort()
            for epoch in int_indices:
                args.reload = os.path.join(
                    args.reload_dir, 'model_state_{:02d}.pytar'.format(epoch))
                model, loss, restarting_epoch = get_model_and_loss(args)
                test_one_epoch(model, loss, val_loader, epoch, args)
        else:
            test_one_epoch(model, loss, val_loader, 0, args)
    else:
        raise NotImplementedError("Unsupported mode {}".format(args.mode))
def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    torch.backends.cudnn.benchmark = True

    net = create_network()
    net.to(device)
    criterion = config.create_loss_function().to(device)

    optimizer = config.create_optimizer(net.parameters())
    lr_scheduler = config.create_lr_scheduler(optimizer)

    ds_train = create_train_dataset(args.batch_size)
    ds_val = create_test_dataset(args.batch_size)

    train_attack = config.create_attack_method(device)
    eval_attack = config.create_evaluation_attack_method(device)

    now_epoch = 0

    if args.auto_continue:
        args.resume = os.path.join(config.model_dir, 'last.checkpoint')
    if args.resume is not None and os.path.isfile(args.resume):
        now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler)

    for i in range(now_epoch, config.num_epochs):
        # if now_epoch > config.num_epochs:
        #     break
        # now_epoch = now_epoch + 1

        descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(i, config.num_epochs,
                                                             lr_scheduler.get_last_lr()[0])
        train_one_epoch(net, ds_train, optimizer, criterion, device,
                        descrip_str, train_attack, adv_coef=args.adv_coef)
        if config.eval_interval > 0 and i % config.eval_interval == 0:
            eval_one_epoch(net, ds_val, device, eval_attack)

        lr_scheduler.step()

    save_checkpoint(i, net, optimizer, lr_scheduler,
                    file_name=os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(i)))
Пример #4
0
ds_train = create_train_dataset(args.batch_size)
ds_val = create_test_dataset(args.batch_size)

TrainAttack = config.create_attack_method(DEVICE)
EvalAttack = config.create_evaluation_attack_method(DEVICE)

now_epoch = 0

if args.auto_continue:
    args.resume = os.path.join(config.model_dir, 'last.checkpoint')
if args.resume is not None and os.path.isfile(args.resume):
    now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler)

while True:
    if now_epoch > config.num_epochs:
        break
    now_epoch = now_epoch + 1

    descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs,
                                                                       lr_scheduler.get_lr()[0])
    train_one_epoch(net, ds_train, optimizer, criterion, DEVICE,
                    descrip_str, TrainAttack, adv_coef = args.adv_coef)
    if config.val_interval > 0 and now_epoch % config.val_interval == 0:
        eval_one_epoch(net, ds_val, DEVICE, EvalAttack)

    lr_scheduler.step()

    save_checkpoint(now_epoch, net, optimizer, lr_scheduler,
                    file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch)))
Пример #5
0
    args.resume = os.path.join(config.model_dir, 'last.checkpoint')
if args.resume is not None and os.path.isfile(args.resume):
    now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler)

while True:
    if now_epoch > config.num_epochs:
        break
    now_epoch = now_epoch + 1

    descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(
        now_epoch, config.num_epochs,
        lr_scheduler.get_lr()[0])
    train_one_epoch(net,
                    ds_train,
                    optimizer,
                    criterion,
                    DEVICE,
                    descrip_str,
                    TrainAttack,
                    attack_freq=args.attack_interval)
    if config.val_interval > 0 and now_epoch % config.val_interval == 0:
        eval_one_epoch(net, ds_val, DEVICE, EvalAttack)

    lr_scheduler.step()

    save_checkpoint(now_epoch,
                    net,
                    optimizer,
                    lr_scheduler,
                    file_name=os.path.join(
                        config.model_dir,
                        'epoch-{}.checkpoint'.format(now_epoch)))
Пример #6
0
    args.resume = os.path.join(config.model_dir, 'last.checkpoint')
if args.resume is not None and os.path.isfile(args.resume):
    now_epoch = load_checkpoint(args.resume, net, optimizer, lr_scheduler)

while True:
    if now_epoch > config.num_epochs:
        break
    now_epoch = now_epoch + 1

    descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(
        now_epoch, config.num_epochs,
        lr_scheduler.get_lr()[0])
    train_one_epoch(
        net,
        ds_train,
        optimizer,
        criterion,
        DEVICE,
        descrip_str,
    )
    if config.val_interval > 0 and now_epoch % config.val_interval == 0:
        eval_one_epoch(
            net,
            ds_val,
            DEVICE,
        )

    lr_scheduler.step()

    save_checkpoint(now_epoch,
                    net,
                    optimizer,
Пример #7
0
    for epoch in range(1, settings.EPOCH2):
        if epoch > args.warm:
            train_scheduler.step(epoch)
        # print('Epoch {}'.format(epoch))

        # train(epoch)
        if args.warm > 0 and epoch <= args.warm:
            train(epoch)
        elif args.train == 'pgd5':
            trainattack = config.create_evaluation_attack_method4(
                torch.device('cuda:{}'.format(0)))
            train_one_epoch(net=net,
                            batch_generator=cifar10_training_loader,
                            optimizer=optimizer,
                            criterion=loss_function,
                            DEVICE=torch.device('cuda:{}'.format(0)),
                            descrip_str=str(epoch) + 'Training',
                            AttackMethod=trainattack)
        elif args.train == 'pgd3':
            trainattack = config.create_attack_method(
                torch.device('cuda:{}'.format(0)))
            train_one_epoch(net=net,
                            batch_generator=cifar10_training_loader,
                            optimizer=optimizer,
                            criterion=loss_function,
                            DEVICE=torch.device('cuda:{}'.format(0)),
                            descrip_str=str(epoch) + 'Training',
                            AttackMethod=trainattack)
        elif args.train == 'pgd10':
            trainattack = config.create_attack_method10(