Exemplo n.º 1
0
def job(tuning, params_path, devices, resume, save_interval):
    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    # パラメーターを変えるときにseedも変えたい(seed averagingの効果を期待)
    seed = sum(ord(_) for _ in str(params.values()))
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    if params['augmentation'] == 'soft':
        params['scale_limit'] = 0.2
        params['brightness_limit'] = 0.1
    elif params['augmentation'] == 'middle':
        params['scale_limit'] = 0.3
        params['shear_limit'] = 4
        params['brightness_limit'] = 0.1
        params['contrast_limit'] = 0.1
    else:
        raise ValueError

    train_transform, eval_transform = data_utils.build_transforms(
        scale_limit=params['scale_limit'],
        shear_limit=params['shear_limit'],
        brightness_limit=params['brightness_limit'],
        contrast_limit=params['contrast_limit'],
    )

    data_loaders = data_utils.make_train_loaders(
        params=params,
        data_root=ROOT + 'input/' + params['data'],
        train_transform=train_transform,
        eval_transform=eval_transform,
        scale='S',
        test_size=0,
        class_topk=params['class_topk'],
        num_workers=8)

    model = models.LandmarkNet(
        n_classes=params['class_topk'],
        model_name=params['model_name'],
        pooling=params['pooling'],
        loss_module=params['loss'],
        s=params['s'],
        margin=params['margin'],
        theta_zero=params['theta_zero'],
        use_fc=params['use_fc'],
        fc_dim=params['fc_dim'],
    ).cuda()
    optimizer = utils.get_optim(params, model)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=params['epochs'] * len(data_loaders['train']),
        eta_min=3e-6)
    start_epoch = 0

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    for epoch in range(start_epoch, params['epochs']):

        logger.info(
            f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}'
        )

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                 total=len(data_loaders['train']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            outputs = model(x, y)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))

            if i % 100 == 99:
                logger.info(
                    f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                )

        train_loss = losses.avg
        train_acc = prec1.avg

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        if (epoch + 1) == params['epochs'] or (epoch + 1) % save_interval == 0:
            output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth'
            utils.save_checkpoint(path=output_file_name,
                                  model=model,
                                  epoch=epoch,
                                  optimizer=optimizer,
                                  params=params)

    model = model.module
    datasets = ('roxford5k', 'rparis6k')
    results = eval_datasets(model,
                            datasets=datasets,
                            ms=False,
                            tta_gem_p=1.0,
                            logger=logger)

    if tuning:
        tuning_result = {}
        for d in datasets:
            for key in ['mapE', 'mapM', 'mapH']:
                mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d]
                tuning_result[d + '-' + key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Exemplo n.º 2
0
def train():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    sample_dir = 'samples'
    os.makedirs(sample_dir, exist_ok=True)

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean=[0.5], std=[0.5])])

    mnist = torchvision.datasets.MNIST(root='data',
                                       train=True,
                                       transform=transform,
                                       download=True)

    data_loader = torch.utils.data.DataLoader(dataset=mnist,
                                              batch_size=params['batch_size'],
                                              shuffle=True)

    D = models.Discriminator(params['image_size'], params['hidden_size'])
    G = models.Generator(params['image_size'], params['latent_size'],
                         params['hidden_size'])

    D = D.to(device)
    G = G.to(device)

    criterion = nn.BCELoss()
    d_optimizer = utils.get_optim(params, D)
    g_optimizer = utils.get_optim(params, G)

    d_losses = []
    g_losses = []
    total_step = len(data_loader)
    for epoch in range(params['epochs']):
        for i, (images, _) in enumerate(data_loader):  # labelは使わない
            # (batch_size, 1, 28, 28) -> (batch_size, 1*28*28)
            b_size = images.size(0)
            images = images.reshape(b_size, -1).to(device)

            real_labels = torch.ones(b_size, 1).to(device)
            fake_labels = torch.zeros(b_size, 1).to(device)

            # Train discriminator
            outputs = D(images)
            d_loss_real = criterion(outputs, real_labels)
            real_score = outputs

            z = torch.randn(b_size, params['latent_size']).to(device)
            fake_images = G(z.detach())
            outputs = D(fake_images)
            d_loss_fake = criterion(outputs, fake_labels)
            fake_score = outputs

            d_loss = d_loss_real + d_loss_fake
            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()

            # Train generator
            fake_images = G(z)
            outputs = D(fake_images)

            g_loss = criterion(outputs, real_labels)

            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()

            print(
                'Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
                .format(
                    epoch, params['epochs'], i + 1, total_step, d_loss.item(),
                    g_loss.item(),
                    real_score.mean().item(),
                    fake_score.mean().item()))  # .item():ゼロ次元Tensorから値を取り出す

            d_losses.append(d_loss.item())
            g_losses.append(g_loss.item())

        if (epoch + 1) == 1:
            images = images.reshape(b_size, 1, 28, 28)
            save_image(utils.denorm(images),
                       os.path.join(sample_dir, 'real_images.png'))
        fake_images = fake_images.reshape(b_size, 1, 28, 28)
        save_image(
            utils.denorm(fake_images),
            os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch + 1)))

    torch.save(G.state_dict(), 'weights/G.ckpt')
    torch.save(D.state_dict(), 'weights/D.ckpt')

    plt.figure(figsize=(10, 5))
    plt.title("Generator and Discriminator Loss During Training")
    plt.plot(g_losses, label="Generator")
    plt.plot(d_losses, label="Discriminator")
    plt.xlabel("iterations")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(os.path.join(sample_dir, 'loss.png'))
Exemplo n.º 3
0
    model = models.LandmarkNet(n_classes=params['class_topk'],
                               model_name=params['model_name'],
                               pooling=params['pooling'],
                               loss_module=params['loss'],
                               s=params['s'],
                               margin=params['margin'],
                               theta_zero=params['theta_zero'],
                               use_fc=params['use_fc'],
                               fc_dim=params['fc_dim'],
                               )
    num_GPU = len(devices.split(',')) > 1
    in num_GPU>0:
        model = model.cuda() 

    criterion = nn.CrossEntropyLoss()
    optimizer = utils.get_optim(params, model)

    if resume:
        #sdict = torch.load(ROOT + params['base_ckpt_path'])['state_dict']
        #del sdict['final.weight']  # remove fully-connected layer
        #model.load_state_dict(sdict, strict=False)
        rets = load_checkpoint(resume, model=model, optimizer=None, params=False, epoach=True)
    


    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=3e-6)
    start_epoch, end_epoch = (0, params['epochs'] - params['scaleup_epochs'])
    start_epoch = rets['epoch']

    for _ in range(start_epoch*len(data_loaders['train'])):
Exemplo n.º 4
0
def train_mlt_single(args):
    global logger
    logger.info(args)
    task_lst, vocabs = utils.get_data(args.data_path)
    task_db = task_lst[args.task_id]
    train_data = task_db.train_set
    dev_data = task_db.dev_set
    test_data = task_db.test_set
    task_name = task_db.task_name

    if args.debug:
        train_data = train_data[:200]
        dev_data = dev_data[:200]
        test_data = test_data[:200]
        args.epochs = 3
        args.pruning_iter = 3

    summary_writer = SummaryWriter(
        log_dir=os.path.join(args.tb_path, "global/%s" % task_name)
    )

    logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id))
    logger.info(
        "train len {}, dev len {}, test len {}".format(
            len(train_data), len(dev_data), len(test_data)
        )
    )

    # init model
    model = get_model(args, task_lst, vocabs)

    logger.info("model: \n{}".format(model))
    if args.init_weights is not None:
        utils.load_model(model, args.init_weights)

    if utils.need_acc(task_name):
        metrics = [AccuracyMetric(target="y"), MetricInForward(val_name="loss")]
        metric_key = "acc"

    else:
        metrics = [
            YangJieSpanMetric(
                tag_vocab=vocabs[task_name],
                pred="pred",
                target="y",
                seq_len="seq_len",
                encoding_type="bioes" if task_name == "ner" else "bio",
            ),
            MetricInForward(val_name="loss"),
        ]
        metric_key = "f"
    logger.info(metrics)

    need_cut_names = list(set([s.strip() for s in args.need_cut.split(",")]))
    prune_names = []
    for name, p in model.named_parameters():
        if not p.requires_grad or "bias" in name:
            continue
        for n in need_cut_names:
            if n in name:
                prune_names.append(name)
                break

    # get Pruning class
    pruner = Pruning(
        model, prune_names, final_rate=args.final_rate, pruning_iter=args.pruning_iter
    )
    if args.init_masks is not None:
        pruner.load(args.init_masks)
        pruner.apply_mask(pruner.remain_mask, pruner._model)
    # save checkpoint
    os.makedirs(args.save_path, exist_ok=True)

    logger.info('Saving init-weights to {}'.format(args.save_path))
    torch.save(
        model.cpu().state_dict(), os.path.join(args.save_path, "init_weights.th")
    )
    torch.save(args, os.path.join(args.save_path, "args.th"))
    # start training and pruning
    summary_writer.add_scalar("remain_rate", 100.0, 0)
    summary_writer.add_scalar("cutoff", 0.0, 0)

    if args.init_weights is not None:
        init_tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = init_tester.test()
        logger.info("No init testing, Result: {}".format(res))
        del res, init_tester

    for prune_step in range(pruner.pruning_iter + 1):
        # reset optimizer every time
        optim_params = [p for p in model.parameters() if p.requires_grad]
        # utils.get_logger(__name__).debug(optim_params)
        utils.get_logger(__name__).debug(len(optim_params))
        optimizer = get_optim(args.optim, optim_params)
        # optimizer = TriOptim(optimizer, args.n_filters, args.warmup, args.decay)
        factor = pruner.cur_rate / 100.0
        factor = 1.0
        # print(factor, pruner.cur_rate)
        for pg in optimizer.param_groups:
            pg["lr"] = factor * pg["lr"]
        utils.get_logger(__name__).info(optimizer)

        trainer = Trainer(
            train_data,
            model,
            loss=LossInForward(),
            optimizer=optimizer,
            metric_key=metric_key,
            metrics=metrics,
            print_every=200,
            batch_size=args.batch_size,
            num_workers=4,
            n_epochs=args.epochs,
            dev_data=dev_data,
            save_path=None,
            sampler=fastNLP.BucketSampler(batch_size=args.batch_size),
            callbacks=[
                pruner,
                # LRStep(lstm.WarmupLinearSchedule(optimizer, args.warmup, int(len(train_data)/args.batch_size*args.epochs)))
                GradientClipCallback(clip_type="norm", clip_value=5),
                LRScheduler(
                    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep))
                ),
                LogCallback(path=os.path.join(args.tb_path, "No", str(prune_step))),
            ],
            use_tqdm=False,
            device="cuda",
            check_code_level=-1,
        )
        res = trainer.train()
        logger.info("No #{} training, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("prunning_dev_acc", val, prune_step)
        tester = Tester(
            test_data,
            model,
            metrics=metrics,
            batch_size=args.batch_size,
            num_workers=4,
            device="cuda",
            use_tqdm=False,
        )
        res = tester.test()
        logger.info("No #{} testing, Result: {}".format(pruner.prune_times, res))
        name, val = get_metric(res)
        summary_writer.add_scalar("pruning_test_acc", val, prune_step)

        # prune and save
        torch.save(
            model.state_dict(),
            os.path.join(
                args.save_path,
                "best_{}_{}.th".format(pruner.prune_times, pruner.cur_rate),
            ),
        )
        pruner.pruning_model()
        summary_writer.add_scalar("remain_rate", pruner.cur_rate, prune_step + 1)
        summary_writer.add_scalar("cutoff", pruner.last_cutoff, prune_step + 1)

        pruner.save(
            os.path.join(
                args.save_path, "{}_{}.th".format(pruner.prune_times, pruner.cur_rate)
            )
        )
def job(tuning, params_path, devices, resume, save_interval):
    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    if resume is None:
        # C-AIRとABCIで整合性が取れるようにしている。
        params[
            'base_ckpt_path'] = f'experiments/v1only/ep4_augmentation-soft_epochs-5_loss-{params["loss"]}.pth'
        params[
            'clean_path'] = ROOT + f'input/clean/train19_cleaned_verifythresh{params["verifythresh"]}_freqthresh{params["freqthresh"]}.csv'
    else:
        params = utils.load_checkpoint(path=resume, params=True)['params']

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    if params['augmentation'] == 'soft':
        params['scale_limit'] = 0.2
        params['brightness_limit'] = 0.1
    elif params['augmentation'] == 'middle':
        params['scale_limit'] = 0.3
        params['shear_limit'] = 4
        params['brightness_limit'] = 0.1
        params['contrast_limit'] = 0.1
    else:
        raise ValueError

    train_transform, eval_transform = data_utils.build_transforms(
        scale_limit=params['scale_limit'],
        shear_limit=params['shear_limit'],
        brightness_limit=params['brightness_limit'],
        contrast_limit=params['contrast_limit'],
    )

    data_loaders = data_utils.make_train_loaders(
        params=params,
        data_root=ROOT + 'input/' + params['data'],
        train_transform=train_transform,
        eval_transform=eval_transform,
        scale='SS2',
        test_size=0,
        class_topk=params['class_topk'],
        num_workers=8)

    model = models.LandmarkNet(
        n_classes=params['class_topk'],
        model_name=params['model_name'],
        pooling=params['pooling'],
        loss_module=params['loss'],
        s=params['s'],
        margin=params['margin'],
        theta_zero=params['theta_zero'],
        use_fc=params['use_fc'],
        fc_dim=params['fc_dim'],
    ).cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = utils.get_optim(params, model)

    if resume is None:
        sdict = torch.load(ROOT + params['base_ckpt_path'])['state_dict']
        if params['loss'] == 'adacos':
            del sdict['final.W']  # remove fully-connected layer
        elif params['loss'] == 'softmax':
            del sdict['final.weight'], sdict[
                'final.bias']  # remove fully-connected layer
        else:
            del sdict['final.weight']  # remove fully-connected layer
        model.load_state_dict(sdict, strict=False)

        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=params['epochs'] * len(data_loaders['train']),
            eta_min=3e-6)
        start_epoch, end_epoch = (0,
                                  params['epochs'] - params['scaleup_epochs'])
    else:
        ckpt = utils.load_checkpoint(path=resume,
                                     model=model,
                                     optimizer=optimizer,
                                     epoch=True)
        model, optimizer, start_epoch = ckpt['model'], ckpt[
            'optimizer'], ckpt['epoch'] + 1
        end_epoch = params['epochs']

        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=params['epochs'] * len(data_loaders['train']),
            eta_min=3e-6,
            last_epoch=start_epoch * len(data_loaders['train']))

        setting += 'scaleup_' + resume.split('/')[-1].replace('.pth', '')

        data_loaders = data_utils.make_verified_train_loaders(
            params=params,
            data_root=ROOT + 'input/' + params['data'],
            train_transform=train_transform,
            eval_transform=eval_transform,
            scale='M2',
            test_size=0,
            num_workers=8)
        batch_norm.freeze_bn(model)

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    for epoch in range(start_epoch, end_epoch):
        logger.info(f'Epoch {epoch}/{end_epoch}')

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                 total=len(data_loaders['train']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            outputs = model(x, y)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))

            if i % 100 == 99:
                logger.info(
                    f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                )

        train_loss = losses.avg
        train_acc = prec1.avg

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        if (epoch + 1) == end_epoch or (epoch + 1) % save_interval == 0:
            output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth'
            utils.save_checkpoint(path=output_file_name,
                                  model=model,
                                  epoch=epoch,
                                  optimizer=optimizer,
                                  params=params)

    model = model.module
    datasets = ('oxford5k', 'paris6k', 'roxford5k', 'rparis6k')
    results = eval_datasets(model,
                            datasets=datasets,
                            ms=True,
                            tta_gem_p=1.0,
                            logger=logger)

    if tuning:
        tuning_result = {}
        for d in datasets:
            if d in ('oxford5k', 'paris6k'):
                tuning_result[d] = results[d]
            else:
                for key in ['mapE', 'mapM', 'mapH']:
                    mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d]
                    tuning_result[d + '-' + key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Exemplo n.º 6
0
def job(tuning, params_path, devices, resume, save_interval):

    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')
    train_transform, eval_transform = build_transforms(
        scale_range=params['scale_range'],
        brightness_range=params['brightness_range'])
    data_loaders = data_utils.make_train_loaders(
        params=params,
        data_root=ROOT + 'input/train2018',
        train_transform=train_transform,
        eval_transform=eval_transform,
        class_topk=params['class_topk'],
        num_workers=8)

    model = models.LandmarkFishNet(
        n_classes=params['class_topk'],
        model_name=params['model_name'],
        pooling_strings=params['pooling'].split(','),
        loss_module='arcface',
        s=30.0,
        margin=params['margin'],
        use_fc=params['use_fc'],
        fc_dim=params['fc_dim'],
    ).cuda()
    optimizer = utils.get_optim(params, model)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=params['epochs'] * len(data_loaders['train']),
        eta_min=1e-6)

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)
    if resume is not None:
        model, optimizer = utils.load_checkpoint(path=resume,
                                                 model=model,
                                                 optimizer=optimizer)

    for epoch in range(params['epochs']):
        logger.info(
            f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}'
        )

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                 total=len(data_loaders['train']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            outputs = model(x, y)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))

            if i % 100 == 99:
                logger.info(
                    f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                )

        train_loss = losses.avg
        train_acc = prec1.avg

        # ============================== validation ============================== #
        model.train(False)
        losses.reset()
        prec1.reset()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['val']),
                                 total=len(data_loaders['val']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            with torch.no_grad():
                outputs = model(x, y)
                loss = criterion(outputs, y)

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))

        val_loss = losses.avg
        val_acc = prec1.avg

        logger.info(f'[Val] Loss: \033[1m{val_loss:.4f}\033[0m | '
                    f'Acc: \033[1m{val_acc:.4f}\033[0m\n')

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalars('Loss', {'val': val_loss}, epoch)
        writer.add_scalars('Acc', {'val': val_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        if save_interval > 0:
            if (epoch +
                    1) == params['epochs'] or (epoch + 1) % save_interval == 0:
                output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth'
                utils.save_checkpoint(path=output_file_name,
                                      model=model,
                                      epoch=epoch,
                                      optimizer=optimizer,
                                      params=params)

    if tuning:
        tuning_result = {}
        for key in ['train_loss', 'train_acc', 'val_loss', 'val_acc']:
            tuning_result[key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Exemplo n.º 7
0
def train(dataset):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  sample_dir = os.path.join('samples', dataset)
  weights_dir = os.path.join('weights', dataset)
  os.makedirs(sample_dir, exist_ok=True)
  os.makedirs(weights_dir, exist_ok=True)

  transforms_ = [
      transforms.Resize(int(params['img_height'] * 1.12), Image.BICUBIC),  # 短い方の辺をsizeにする, 比率はそのまま
      transforms.RandomCrop((params['img_height'],['img_width'])),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
  ]

  # DataLoader
  data_loader = torch.utils.data.DataLoader(
      ImageDataset(os.path.join('data', dataset), transforms_=transforms_, unaligned=True),
      batch_size=params['batch_size'],
      shuffle=True
  )

  val_data_loader = torch.utils.data.DataLoader(
      ImageDataset(os.path.join('data', dataset), transforms_=transforms_, unaligned=True, mode='test'),
      batch_size=5,
      shuffle=True
  )

  # Models
  D_A = models.Discriminator(params['channels'])
  D_B = models.Discriminator(params['channels'])
  G_AB = models.Generator(params['channels'], params['n_residual_blocks'])
  G_BA = models.Generator(params['channels'], params['n_residual_blocks'])

  D_A = D_A.to(device)
  D_B = D_B.to(device)
  G_AB = G_AB.to(device)
  G_BA = G_BA.to(device)

  # initialize models parameters
  D_A.apply(utils.weights_init)
  D_B.apply(utils.weights_init)
  G_AB.apply(utils.weights_init)
  G_BA.apply(utils.weights_init)

  # Losses
  criterion_GAN = nn.MSELoss()
  criterion_cycle = nn.L1Loss()
  criterion_identity = nn.L1Loss()

  # Optimizer
  # Generatorは同時に最適化を行う
  optimizer_G = utils.get_optim(
      params,
      itertools.chain(G_AB.parameters(),G_BA.parameters()), 
  )
  optimizer_D_A = utils.get_optim(params, D_A)
  optimizer_D_B = utils.get_optim(params, D_B)

  # learning rate schedulers
  lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(
      optimizer_G, lr_lambda=utils.LambdaLR(params['epochs'], decay_start_epoch=params['decay_epoch']).step
  )
  lr_scheduler_D_A = torch.optim.lr_scheduler.LambdaLR(
      optimizer_D_A, lr_lambda=utils.LambdaLR(params['epochs'], decay_start_epoch=params['decay_epoch']).step
  )
  lr_scheduler_D_B = torch.optim.lr_scheduler.LambdaLR(
      optimizer_D_B
      , lr_lambda=utils.LambdaLR(params['epochs'], decay_start_epoch=params['decay_epoch']).step
  )
  # Buffers of previously generated samples
  fake_A_buffer = utils.ReplayBuffer()
  fake_B_buffer = utils.ReplayBuffer()

  def sample_images(epochs):
      """Saves a generated sample from the test set"""
      imgs = next(iter(val_data_loader))
      G_AB.eval()
      G_BA.eval()
      with torch.no_grad():
        real_A = imgs["A"].to(device)
        fake_B = G_AB(real_A)
        real_B = imgs["B"].to(device)
        fake_A = G_BA(real_B)
      # Arange images along x-axis
      real_A = make_grid(real_A, nrow=5, normalize=True)
      real_B = make_grid(real_B, nrow=5, normalize=True)
      fake_A = make_grid(fake_A, nrow=5, normalize=True)
      fake_B = make_grid(fake_B, nrow=5, normalize=True)
      # Arange images along y-axis
      image_grid = torch.cat((real_A, fake_B, real_B, fake_A), 1)
      save_image(image_grid, os.path.join(samples_dir,"fake_images-%s.png" % (epochs)), normalize=False)

  losses_D = []
  losses_G = []
  total_step = len(data_loader)
  for epoch in range(params['epochs']):
    for i, images in enumerate(data_loader):
      real_A = images['A'].to(device)
      real_B = images['B'].to(device)

      b_size = real_A.size(0)

      # TODO: require_grad, G.train(), 自動化できないか
      real_labels = torch.ones((b_size, 1, 16, 16)).to(device)
      fake_labels = torch.zeros((b_size, 1, 16, 16)).to(device)

      # Train Generator
      optimizer_G.zero_grad()

      # GAN loss
      fake_B = G_AB(real_A)
      loss_GAN_AB = criterion_GAN(D_B(fake_B), real_labels)
      fake_A = G_BA(real_B)
      loss_GAN_BA = criterion_GAN(D_A(fake_A), real_labels)
      loss_GAN = (loss_GAN_AB + loss_GAN_BA) / 2

      # Cycle loss
      recov_A = G_BA(fake_B)
      loss_cycle_A = criterion_cycle(recov_A, real_A)
      recov_B = G_AB(fake_A)
      loss_cycle_B = criterion_cycle(recov_B, real_B)

      loss_cycle = (loss_cycle_A + loss_cycle_B) / 2

      # Total loss
      loss_G = loss_GAN + params['lambda_cyc'] * loss_cycle

      loss_G.backward()
      optimizer_G.step()

      # Train discriminator A
      optimizer_D_A.zero_grad()

      loss_real_A = criterion_GAN(D_A(real_A), real_labels)
      fake_A_ = fake_A_buffer.push_and_pop(fake_A)
      loss_fake_A = criterion_GAN(D_A(fake_A_.detach()), fake_labels)
      loss_D_A = (loss_real_A + loss_fake_A) / 2
      loss_D_A.backward()
      optimizer_D_A.step()

      # Train discriminator B
      optimizer_D_B.zero_grad()

      loss_real_B = criterion_GAN(D_B(real_B), real_labels)
      fake_B_ = fake_A_buffer.push_and_pop(fake_B)
      loss_fake_B = criterion_GAN(D_B(fake_B_.detach()), fake_labels)
      loss_D_B = (loss_real_B + loss_fake_B) / 2
      loss_D_B.backward()
      optimizer_D_B.step()

      loss_D = (loss_D_A + loss_D_B) / 2


      print('Epoch [{}/{}], step [{}/{}], loss_D: {:.4f}, loss_G: {:.4f}, D_A(x): {:.2f}, D_A(G_BA(z)): {:.2f}, D_B(x): {:.2f}, D_B(G_AB(z)): {:.2f}'
            .format(epoch, params['epochs'], i + 1, total_step, loss_D.item(), loss_G.item(),
                    loss_real_A.mean().item(), loss_real_B.mean().item(),
                    loss_real_B.mean().item(), loss_real_B.mean().item()))  

      losses_G.append(loss_G.item())
      losses_D.append(loss_D.item())

    if epoch % params['checkpoint_interval'] == 0:
      torch.save(G_AB.state_dict(), os.path.join(weights_dir, 'G_AB.ckpt'))
      torch.save(G_BA.state_dict(), os.path.join(weights_dir, 'G_BA.ckpt'))
      torch.save(D_A.state_dict(), os.path.join(weights_dir, 'D_A.ckpt'))
      torch.save(D_B.state_dict(), os.path.join(weights_dir, 'D_B.ckpt'))

    # if (epoch + 1) == 1:
    #   save_image(utils.denorm(images), os.path.join(
    #       sample_dir, 'real_images.png'))
    sample_images(epoch + 1)


  plt.figure(figsize=(10, 5))
  plt.title("Generator and Discriminator Loss During Training")
  plt.plot(g_losses, label="Generator")
  plt.plot(d_losses, label="Discriminator")
  plt.xlabel("iterations")
  plt.ylabel("Loss")
  plt.legend()
  plt.savefig(os.path.join(sample_dir, 'loss.png'))
Exemplo n.º 8
0
def job(tuning, params_path, devices, resume, save_interval):
    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    exp_path = os.path.join(dataset_connector.result_dir,
                            f'{params["ex_name"]}/')
    os.environ['CUDA_VISIBLE_DEVICES'] = devices
    print("CUDA Available:", torch.cuda.is_available(),
          "CUDA_VISIBLE_DEVICES:", os.environ['CUDA_VISIBLE_DEVICES'])
    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    if params['augmentation'] == 'soft':
        params['scale_limit'] = 0.2
        params['brightness_limit'] = 0.1
    elif params['augmentation'] == 'middle':
        params['scale_limit'] = 0.3
        params['shear_limit'] = 4
        params['brightness_limit'] = 0.1
        params['contrast_limit'] = 0.1
    else:
        raise ValueError

    train_transform, eval_transform = data_utils.build_transforms(
        scale_limit=params['scale_limit'],
        shear_limit=params['shear_limit'],
        brightness_limit=params['brightness_limit'],
        contrast_limit=params['contrast_limit'],
    )

    data_loaders = data_utils.make_train_loaders(
        params=params,
        data_root=ROOT + 'input/' + params['train_data'],
        use_clean_version=True,
        train_transform=train_transform,
        eval_transform=eval_transform,
        scale='S2',
        test_size=0.1,
        num_workers=os.cpu_count() * 2)

    model = models.LandmarkNet(
        n_classes=params['class_topk'],
        model_name=params['model_name'],
        pooling=params['pooling'],
        loss_module=params['loss'],
        s=params['s'],
        margin=params['margin'],
        theta_zero=params['theta_zero'],
        use_fc=params['use_fc'],
        fc_dim=params['fc_dim'],
    ).cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = utils.get_optim(params, model)

    # TODO: Missing initial weight file.
    # sdict = torch.load(ROOT + params['base_ckpt_path'])['state_dict']
    # del sdict['final.weight']  # remove fully-connected layer
    # model.load_state_dict(sdict, strict=False)

    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=params['epochs'] * len(data_loaders['train']),
        eta_min=3e-6)
    start_epoch, end_epoch = (0, params['epochs'] - params['scaleup_epochs'])

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    for epoch in range(start_epoch, end_epoch):
        logger.info(f'Epoch {epoch}/{end_epoch}')

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                 total=len(data_loaders['train']),
                                 miniters=None,
                                 ncols=55):
            x = x.to('cuda')
            y = y.to('cuda')

            outputs = model(x, y)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            acc = metrics.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc, x.size(0))
            logger.info("Training Loss:{},Accuracy(Prec1):{}".format(
                loss.item(), acc))
            if i % 100 == 99:
                logger.info(
                    f'{epoch + i / len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                )

        train_loss = losses.avg
        train_acc = prec1.avg

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        if (epoch + 1) == end_epoch or (epoch + 1) % save_interval == 0:
            output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth'
            utils.save_checkpoint(path=output_file_name,
                                  model=model,
                                  epoch=epoch,
                                  optimizer=optimizer,
                                  params=params)

    model = model.module
    datasets = ('oxford5k', 'paris6k', 'roxford5k', 'rparis6k')
    results = eval_datasets(model,
                            datasets=datasets,
                            ms=True,
                            tta_gem_p=1.0,
                            logger=logger)

    if tuning:
        tuning_result = {}
        for d in datasets:
            if d in ('oxford5k', 'paris6k'):
                tuning_result[d] = results[d]
            else:
                for key in ['mapE', 'mapM', 'mapH']:
                    mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d]
                    tuning_result[d + '-' + key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Exemplo n.º 9
0
def train(dataset, data):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    sample_dir = os.path.join('samples', dataset)
    weights_dir = os.path.join('weights', dataset)
    os.makedirs(sample_dir, exist_ok=True)
    os.makedirs(weights_dir, exist_ok=True)

    if dataset == 'mnist':
        dataset = torchvision.datasets.MNIST(
            root=data,
            download=True,
            train=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(  # [0,1] -> [-1, 1]
                    (0.5, ), (0.5, )),
            ]))
        params['nc'] = 1

    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=params['batch_size'],
                                              shuffle=True)

    D = models.Discriminator(params['ndf'], params['image_size'],
                             params['labels'])
    G = models.Generator(params['nz'], params['ngf'], params['image_size'],
                         params['labels'])

    D = D.to(device)
    G = G.to(device)

    criterion = nn.BCELoss()
    d_optimizer = utils.get_optim(params, D)
    g_optimizer = utils.get_optim(params, G)

    d_losses = []
    g_losses = []
    total_step = len(data_loader)
    for epoch in range(params['epochs']):
        for i, (images, labels) in enumerate(data_loader):
            b_size = images.size(0)
            images = images.reshape(b_size, -1).to(device)
            labels = labels.to(device)

            real_labels = torch.ones(b_size).to(device)
            fake_labels = torch.zeros(b_size).to(device)
            random_labels = torch.LongTensor(np.random.randint(
                0, 10, b_size)).to(device)

            # Train discriminator
            outputs = D(images, labels)
            d_loss_real = criterion(outputs, real_labels)
            real_score = outputs

            z = torch.randn(b_size, params['nz']).to(device)
            fake_images = G(z, random_labels)
            outputs = D(fake_images.detach(), random_labels)
            d_loss_fake = criterion(outputs, fake_labels)
            fake_score = outputs

            d_loss = d_loss_real + d_loss_fake
            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()

            # Train generator
            # ランダムなラベルを再定義
            random_labels = torch.LongTensor(np.random.randint(
                0, 10, b_size)).to(device)
            fake_images = G(z, random_labels)
            outputs = D(fake_images, random_labels)

            g_loss = criterion(outputs, real_labels)

            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()

            print(
                'Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
                .format(
                    epoch, params['epochs'], i + 1, total_step, d_loss.item(),
                    g_loss.item(),
                    real_score.mean().item(),
                    fake_score.mean().item()))  # .item():ゼロ次元Tensorから値を取り出す

            g_losses.append(g_loss.item())
            d_losses.append(d_loss.item())

        if (epoch + 1) == 1:
            images = images.reshape(b_size, 1, 28, 28)
            save_image(utils.denorm(images),
                       os.path.join(sample_dir, 'real_images.png'))
        fake_images = fake_images.reshape(b_size, 1, 28, 28)
        save_image(
            utils.denorm(fake_images),
            os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch + 1)))

    torch.save(G.state_dict(), os.path.join(weights_dir, 'G.ckpt'))
    torch.save(D.state_dict(), os.path.join(weights_dir, 'D.ckpt'))

    plt.figure(figsize=(10, 5))
    plt.title("Generator and Discriminator Loss During Training")
    plt.plot(g_losses, label="Generator")
    plt.plot(d_losses, label="Discriminator")
    plt.xlabel("iterations")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(os.path.join(sample_dir, 'loss.png'))
Exemplo n.º 10
0
def job(tuning, params_path, devices, resume):
    """
    Example:
        python exp0.py job --devices 0,1 -s
        python exp0.py tuning --devices 0,1 --n-gpu 1 --mode 'random' --n-iter 4
    """

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    train_df = pd.read_csv(ROOT + 'data/train.csv')
    train_df, val_df = train_test_split(train_df,
                                        test_size=1024,
                                        random_state=params['seed'])

    model = models.UNet(in_channels=3,
                        n_classes=2,
                        depth=4,
                        ch_first=32,
                        padding=True,
                        batch_norm=False,
                        up_mode='upconv').cuda()

    optimizer = utils.get_optim(model, params)

    if resume is not None:
        model, optimizer = utils.load_checkpoint(model,
                                                 resume,
                                                 optimizer=optimizer)

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
        'val':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
    }
    image_datasets = {
        'train': data_utils.CSVDataset(train_df, data_transforms['train']),
        'val': data_utils.CSVDataset(val_df, data_transforms['val'])
    }
    data_loaders = {
        'train':
        DataLoader(image_datasets['train'],
                   batch_size=params['batch_size'],
                   pin_memory=True,
                   shuffle=True,
                   drop_last=True,
                   num_workers=params['workers']),
        'val':
        DataLoader(image_datasets['val'],
                   batch_size=params['test_batch_size'],
                   pin_memory=True,
                   shuffle=False,
                   num_workers=params['workers'])
    }

    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(params['epochs'] * 0.7),
                    int(params['epochs'] * 0.9)],
        gamma=0.1)

    for epoch in range(params['epochs']):
        logger.info(
            f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}'
        )

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (x, y) in tqdm(enumerate(data_loaders['train']),
                              total=len(data_loaders['train']),
                              miniters=50):
            x = x.to('cuda:0')
            y = y.to('cuda:0', non_blocking=True)

            outputs = model(x)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = utils.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc.item(), x.size(0))

        train_loss = losses.avg
        train_acc = prec1.avg

        # ============================== validation ============================== #
        model.train(False)
        losses.reset()
        prec1.reset()

        for i, (x, y) in tqdm(enumerate(data_loaders['val']),
                              total=len(data_loaders['val'])):
            x = x.cuda()
            y = y.cuda(non_blocking=True)

            with torch.no_grad():
                outputs = model(x)
                loss = criterion(outputs, y)

            acc = utils.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc.item(), x.size(0))

        val_loss = losses.avg
        val_acc = prec1.avg

        logger.info(f'[Val] Loss: \033[1m{val_loss:.4f}\033[0m | '
                    f'Acc: \033[1m{val_acc:.4f}\033[0m\n')

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalars('Loss', {'val': val_loss}, epoch)
        writer.add_scalars('Acc', {'val': val_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        scheduler.step()

        if not tuning:
            utils.save_checkpoint(model, epoch, exp_path + 'model_optim.pth',
                                  optimizer)

    if tuning:
        tuning_result = {}
        for key in ['train_loss', 'train_acc', 'val_loss', 'val_acc']:
            tuning_result[key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Exemplo n.º 11
0
    logger.info("Model parameters:")
    params = list(model.named_parameters())
    sum_param = 0
    for name, param in params:
        if param.requires_grad:
            logger.info("{}: {}".format(name, param.shape))
            sum_param += param.numel()
    logger.info("# Parameters: {}.".format(sum_param))
    masker.to("cuda" if torch.cuda.is_available() else "cpu")
    Trainer = get_trainer_cls(args)

    if not args.evaluate:
        logger.info("========== Training Model ==========")
        base_params = filter(lambda p: p.requires_grad, model.parameters())
        opt = utils.get_optim(args.optim, base_params)
        logger.info(opt)
        trainer = Trainer(masker, task_lst, vocabs, opt, args)

        trainer.train(args.epochs)

        logger.info("========== Testing Model ==========")
        trainer.model = utils.load_model(
            model, os.path.join(args.save_path, "best.th"))
        test_loss, test_acc = trainer._eval_epoch(dev=False)
        logger.info(args.exp_name)
        for acc in test_acc.items():
            logger.info(acc)

    else:
        logger.info("========== Evaluating Model ==========")
def job(tuning, params_path, devices, resume, save_interval):
    global params
    for loss_input in [
            "Softmax", "arcface", "cosface", "AdditiveMarginSoftmaxLoss"
    ]:
        params["loss"] = loss_input
        if tuning:
            with open(params_path, 'r') as f:
                params = json.load(f)
            mode_str = 'tuning'
            setting = '_'.join(f'{tp}-{params[tp]}'
                               for tp in params['tuning_params'])
        else:
            mode_str = 'train'
            setting = ''

        exp_path = os.path.join(dataset_connector.result_dir,
                                f'{params["ex_name"]}/', f'{params["loss"]}/')
        os.environ['CUDA_VISIBLE_DEVICES'] = devices
        print("CUDA Available:", torch.cuda.is_available(),
              "CUDA_VISIBLE_DEVICES:", os.environ['CUDA_VISIBLE_DEVICES'])
        logger, writer = utils.get_logger(
            log_dir=exp_path + f'{mode_str}/log/{setting}',
            tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

        if params['augmentation'] == 'soft':
            params['scale_limit'] = 0.2
            params['brightness_limit'] = 0.1
        elif params['augmentation'] == 'middle':
            params['scale_limit'] = 0.3
            params['shear_limit'] = 4
            params['brightness_limit'] = 0.1
            params['contrast_limit'] = 0.1
        else:
            raise ValueError

        train_transform, eval_transform = data_utils.build_transforms(
            scale_limit=params['scale_limit'],
            shear_limit=params['shear_limit'],
            brightness_limit=params['brightness_limit'],
            contrast_limit=params['contrast_limit'],
        )

        data_loaders = data_utils.make_train_loaders(
            params=params,
            data_root=ROOT + 'input/' + params['train_data'],
            use_clean_version=True,
            train_transform=train_transform,
            eval_transform=eval_transform,
            scale='S2',
            test_size=0.1,
            num_workers=os.cpu_count() * 2)

        model = models.LandmarkNet(
            n_classes=params['class_topk'],
            model_name=params['model_name'],
            pooling=params['pooling'],
            loss_module=params['loss'],
            s=params['s'],
            margin=params['margin'],
            theta_zero=params['theta_zero'],
            use_fc=params['use_fc'],
            fc_dim=params['fc_dim'],
        ).cuda()

        criterion = nn.CrossEntropyLoss()
        optimizer = utils.get_optim(params, model)

        state_dict = torch.load(
            os.path.join("../pretrained_model",
                         "Epoch14_" + loss_input + ".pth"))
        clean_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]
            clean_state_dict[name] = v

        # sdict = torch.load(resume)['state_dict']
        # del sdict['final.weight']  # remove fully-connected layer
        # model.load_state_dict(sdict, strict=False)
        # model.backbone.requires_grad = False
        # model.bn.requires_grad = False
        # model.dropout.requires_grad = False
        # model.fc.requires_grad = False
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=params['epochs'] * len(data_loaders['train']),
            eta_min=3e-6)
        start_epoch, end_epoch = (15,
                                  params['epochs'] - params['scaleup_epochs'])

        if len(devices.split(',')) > 1:
            model = nn.DataParallel(model)

        for epoch in range(start_epoch, end_epoch):
            logger.info(f'Epoch {epoch}/{end_epoch}')

            # ============================== train ============================== #
            model.train(True)

            losses = utils.AverageMeter()
            prec1 = utils.AverageMeter()

            for i, (_, x, y) in tqdm(enumerate(data_loaders['train']),
                                     total=len(data_loaders['train']),
                                     miniters=None,
                                     ncols=55):
                x = x.to('cuda')
                y = y.to('cuda')

                if params["loss"] in ["AdditiveMarginSoftmaxLoss"]:
                    outputs, loss = model(x, y)
                    loss = loss.mean()
                elif params["loss"] in ["LSoftmax", "arcface", "cosface"]:
                    outputs = model(x, y)
                    loss = criterion(outputs, y)
                elif params["loss"] in ["Softmax"]:
                    outputs = model(x)
                    loss = criterion(outputs, y)
                break
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()

                acc = metrics.accuracy(outputs, y)
                losses.update(loss.item(), x.size(0))
                prec1.update(acc, x.size(0))
                logger.info("Training Loss:{},Accuracy(Prec1):{}".format(
                    loss.item(), acc))
                if i % 100 == 99:
                    logger.info(
                        f'{epoch + i / len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}'
                    )

            break
            train_loss = losses.avg
            train_acc = prec1.avg

            writer.add_scalars('Loss', {'train': train_loss}, epoch)
            writer.add_scalars('Acc', {'train': train_acc}, epoch)
            writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

            if (epoch + 1) == end_epoch or (epoch + 1) % save_interval == 0:
                output_file_name = exp_path + f'Epoch{epoch}_' + str(
                    params["loss"]) + setting + '.pth'
                print("Model Saved:{}".format(output_file_name))
                utils.save_checkpoint(path=output_file_name,
                                      model=model,
                                      epoch=epoch,
                                      optimizer=optimizer,
                                      params=params)