def main():
    global BEST_LOSS
    cudnn.benchmark = True
    start_epoch = cfg.OPOSE.start_epoch  # start from epoch 0 or last checkpoint epoch
    # Create ckpt & vis folder
    if not os.path.isdir(cfg.OPOSE.ckpt):
        os.makedirs(cfg.OPOSE.ckpt)
    if not os.path.exists(os.path.join(cfg.OPOSE.ckpt, 'vis')):
        os.makedirs(os.path.join(cfg.OPOSE.ckpt, 'vis'))
    if args.cfg_file is not None and not cfg.OPOSE.evaluate:
        shutil.copyfile(
            args.cfg_file,
            os.path.join(cfg.OPOSE.ckpt,
                         args.cfg_file.split('/')[-1]))
    model = pose_estimation.PoseModel(num_point=19,
                                      num_vector=19,
                                      pretrained=True)
    # # Calculate FLOPs & Param
    # n_flops, n_convops, n_params = measure_model(model, cfg.OPOSE.input_size, cfg.OPOSE.input_size)
    criterion = nn.MSELoss().cuda()
    # Dataset and Loader
    train_dataset = dataset.CocoOpenposeData(
        cfg,
        cfg.OPOSE.data_root,
        cfg.OPOSE.info_root,
        'train2017',
        transformer=transforms.Compose([
            transforms.RandomResized(),
            transforms.RandomRotate(40),
            transforms.RandomCrop(368),
            transforms.RandomHorizontalFlip(),
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=int(cfg.workers),
                                               pin_memory=True)
    if cfg.OPOSE.validate or cfg.OPOSE.evaluate:
        val_dataset = dataset.CocoOpenposeData(
            cfg,
            cfg.OPOSE.data_root,
            cfg.OPOSE.info_root,
            'val2017',
            transformer=transforms.Compose(
                [transforms.TestResized(cfg.OPOSE.input_size)]))
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 num_workers=int(cfg.workers),
                                                 pin_memory=True)
    # Load nets into gpu
    if NUM_GPUS > 1:
        model = torch.nn.DataParallel(model, device_ids=gpu).cuda()
    # Set up optimizers
    params, multiple = get_parameters(model, cfg, False)

    optimizer = torch.optim.SGD(params,
                                cfg.OPOSE.base_lr,
                                momentum=cfg.OPOSE.momentum,
                                weight_decay=cfg.OPOSE.weight_decay)

    # Resume training
    title = 'Pytorch-OPOSE-{}-{}'.format(cfg.OPOSE.arch_encoder,
                                         cfg.OPOSE.arch_decoder)
    if cfg.OPOSE.resume:
        # Load checkpoint.
        print("==> Resuming from checkpoint '{}'".format(cfg.OPOSE.resume))
        assert os.path.isfile(
            cfg.OPOSE.resume), 'Error: no checkpoint directory found!'
        ckpt = torch.load(cfg.OPOSE.resume)
        BEST_LOSS = ckpt['best_loss']
        start_epoch = ckpt['epoch']
        try:
            model.module.load_state_dict(ckpt['state_dict'])
        except:
            model.load_state_dict(ckpt['state_dict'])

        optimizer.load_state_dict(ckpt['optimizer'])
        logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title)
        logger.set_names(
            ['epoch', 'Learning Rate', 'Train Loss', 'Valid Loss'])

    # Train and val
    for epoch in range(start_epoch, cfg.OPOSE.epochs):
        print('\nEpoch: [{}/{}] | LR: {:.8f} '.format(epoch + 1,
                                                      cfg.OPOSE.epochs,
                                                      cfg.OPOSE.base_lr))
        train_loss = train(train_loader, model, criterion, optimizer, epoch,
                           USE_CUDA)
        if cfg.OPOSE.validate:
            test_loss = test(val_loader, model, criterion, optimizer, epoch,
                             USE_CUDA)
        else:
            test_loss = 0.0, 0.0

        # Append logger file
        logger.append([epoch, cfg.OPOSE.base_lr, train_loss, test_loss])
        # Save model
        save_checkpoint(model, optimizer, test_loss, epoch)
        # Adjust learning rate
        adjust_learning_rate(optimizer, epoch)
        # Draw curve
        try:
            draw_curve('model', cfg.OPOSE.ckpt)
            print('==> Success saving log curve...')
        except:
            print('==> Saving log curve error...')

    logger.close()
    try:
        savefig(os.path.join(cfg.OPOSE.ckpt, 'log.eps'))
        shutil.copyfile(
            os.path.join(cfg.OPOSE.ckpt, 'log.txt'),
            os.path.join(
                cfg.OPOSE.ckpt, 'log{}.txt'.format(
                    datetime.datetime.now().strftime('%Y%m%d%H%M%S'))))
    except:
        print('Copy log error.')
    print('==> Training Done!')
    print('==> Best acc: {:.4f}%'.format(BEST_LOSS))
Пример #2
0
def main():
    global BEST_ACC, LR_STATE
    start_epoch = cfg.CLS.start_epoch  # start from epoch 0 or last checkpoint epoch

    # Create ckpt folder
    if not os.path.isdir(cfg.CLS.ckpt):
        mkdir_p(cfg.CLS.ckpt)
    if args.cfg_file is not None and not cfg.CLS.evaluate:
        shutil.copyfile(
            args.cfg_file,
            os.path.join(cfg.CLS.ckpt,
                         args.cfg_file.split('/')[-1]))

    # Dataset and Loader
    normalize = transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)
    train_aug = [
        transforms.RandomResizedCrop(cfg.CLS.crop_size),
        transforms.RandomHorizontalFlip()
    ]
    if len(cfg.CLS.rotation) > 0:
        train_aug.append(transforms.RandomRotation(cfg.CLS.rotation))
    if len(cfg.CLS.pixel_jitter) > 0:
        train_aug.append(RandomPixelJitter(cfg.CLS.pixel_jitter))
    if cfg.CLS.grayscale > 0:
        train_aug.append(transforms.RandomGrayscale(cfg.CLS.grayscale))
    train_aug.append(transforms.ToTensor())
    train_aug.append(normalize)

    traindir = os.path.join(cfg.CLS.data_root, cfg.CLS.train_folder)
    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir, transforms.Compose(train_aug)),
                                               batch_size=cfg.CLS.train_batch,
                                               shuffle=True,
                                               num_workers=cfg.workers,
                                               pin_memory=True)

    if cfg.CLS.validate or cfg.CLS.evaluate:
        valdir = os.path.join(cfg.CLS.data_root, cfg.CLS.val_folder)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(cfg.CLS.base_size),
                transforms.CenterCrop(cfg.CLS.crop_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=cfg.CLS.test_batch,
                                                 shuffle=False,
                                                 num_workers=cfg.workers,
                                                 pin_memory=True)

    # Create model
    model = models.__dict__[cfg.CLS.arch]()
    print(model)
    # Calculate FLOPs & Param
    n_flops, n_convops, n_params = measure_model(model, cfg.CLS.crop_size,
                                                 cfg.CLS.crop_size)
    print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'.format(
        n_flops / 1e6, n_convops / 1e6, n_params / 1e6))
    del model
    model = models.__dict__[cfg.CLS.arch]()

    # Load pre-train model
    if cfg.CLS.pretrained:
        print("==> Using pre-trained model '{}'".format(cfg.CLS.pretrained))
        pretrained_dict = torch.load(cfg.CLS.pretrained)
        try:
            pretrained_dict = pretrained_dict['state_dict']
        except:
            pretrained_dict = pretrained_dict
        model_dict = model.state_dict()
        updated_dict, match_layers, mismatch_layers = weight_filler(
            pretrained_dict, model_dict)
        model_dict.update(updated_dict)
        model.load_state_dict(model_dict)
    else:
        print("==> Creating model '{}'".format(cfg.CLS.arch))

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if cfg.CLS.pretrained:

        def param_filter(param):
            return param[1]

        new_params = map(
            param_filter,
            filter(lambda p: p[0] in mismatch_layers,
                   model.named_parameters()))
        base_params = map(
            param_filter,
            filter(lambda p: p[0] in match_layers, model.named_parameters()))
        model_params = [{
            'params': base_params
        }, {
            'params': new_params,
            'lr': cfg.CLS.base_lr * 10
        }]
    else:
        model_params = model.parameters()
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    optimizer = optim.SGD(model_params,
                          lr=cfg.CLS.base_lr,
                          momentum=cfg.CLS.momentum,
                          weight_decay=cfg.CLS.weight_decay)

    # Evaluate model
    if cfg.CLS.evaluate:
        print('\n==> Evaluation only')
        test_loss, test_top1, test_top5 = test(val_loader, model, criterion,
                                               start_epoch, USE_CUDA)
        print(
            '==> Test Loss: {:.8f} | Test_top1: {:.4f}% | Test_top5: {:.4f}%'.
            format(test_loss, test_top1, test_top5))
        return

    # Resume training
    title = 'Pytorch-CLS-' + cfg.CLS.arch
    if cfg.CLS.resume:
        # Load checkpoint.
        print("==> Resuming from checkpoint '{}'".format(cfg.CLS.resume))
        assert os.path.isfile(
            cfg.CLS.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(cfg.CLS.resume)
        BEST_ACC = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    # Train and val
    for epoch in range(start_epoch, cfg.CLS.epochs):
        print('\nEpoch: [{}/{}] | LR: {:.8f}'.format(epoch + 1, cfg.CLS.epochs,
                                                     LR_STATE))

        train_loss, train_acc = mixup_train(train_loader, model, criterion,
                                            optimizer, epoch, USE_CUDA)
        if cfg.CLS.validate:
            test_loss, test_top1, test_top5 = test(val_loader, model,
                                                   criterion, epoch, USE_CUDA)
        else:
            test_loss, test_top1, test_top5 = 0.0, 0.0, 0.0

        # Append logger file
        logger.append([LR_STATE, train_loss, test_loss, train_acc, test_top1])
        # Save model
        save_checkpoint(model, optimizer, test_top1, epoch)
        # Draw curve
        try:
            draw_curve(cfg.CLS.arch, cfg.CLS.ckpt)
            print('==> Success saving log curve...')
        except:
            print('==> Saving log curve error...')

    logger.close()
    try:
        savefig(os.path.join(cfg.CLS.ckpt, 'log.eps'))
        shutil.copyfile(
            os.path.join(cfg.CLS.ckpt, 'log.txt'),
            os.path.join(
                cfg.CLS.ckpt, 'log{}.txt'.format(
                    datetime.datetime.now().strftime('%Y%m%d%H%M%S'))))
    except:
        print('copy log error.')
    print('==> Training Done!')
    print('==> Best acc: {:.4f}%'.format(BEST_ACC))