Exemplo n.º 1
0
def main(args):
    # Step 1: parse args config
    logging.basicConfig(
        format=
        '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s',
        level=logging.INFO,
        handlers=[
            logging.FileHandler(args.log_file, mode='w'),
            logging.StreamHandler()
        ])
    print_args(args)
    if args.backbone == "v2":
        from models.pfld import PFLDInference, AuxiliaryNet
    elif args.backbone == "v3":
        from models.mobilev3_pfld import PFLDInference, AuxiliaryNet
    elif args.backbone == "ghost":
        from models.ghost_pfld import PFLDInference, AuxiliaryNet
    elif args.backbone == "lite":
        from models.lite import PFLDInference, AuxiliaryNet
    else:
        raise ValueError("backbone is not implemented")
    plfd_backbone = PFLDInference()
    auxiliarynet = AuxiliaryNet()
    if os.path.exists(args.resume) and args.resume.endswith('.pth'):
        logging.info("loading the checkpoint from {}".format(args.resume))
        check = torch.load(args.resume, map_location=torch.device('cpu'))
        plfd_backbone.load_state_dict(check["plfd_backbone"])
        auxiliarynet.load_state_dict(check["auxiliarynet"])
        args.start_epoch = check["epoch"]

    # Step 2: model, criterion, optimizer, scheduler
    plfd_backbone = plfd_backbone.to(device)
    auxiliarynet = auxiliarynet.to(device)
    criterion = LandMarkLoss()
    optimizer = torch.optim.Adam([{
        'params': plfd_backbone.parameters()
    }, {
        'params': auxiliarynet.parameters()
    }],
                                 lr=args.base_lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=args.lr_patience, verbose=True)

    # step 3: data
    # argumetion
    transform = transforms.Compose([transforms.ToTensor()])
    wlfwdataset = PFLDDatasets(args.dataroot,
                               transform,
                               img_root=os.path.realpath('./data'),
                               img_size=args.img_size)
    dataloader = DataLoader(wlfwdataset,
                            batch_size=args.train_batchsize,
                            shuffle=True,
                            num_workers=args.workers,
                            drop_last=False)

    wlfw_val_dataset = PFLDDatasets(args.val_dataroot,
                                    transform,
                                    img_root=os.path.realpath('./data'),
                                    img_size=args.img_size)
    wlfw_val_dataloader = DataLoader(wlfw_val_dataset,
                                     batch_size=args.val_batchsize,
                                     shuffle=False,
                                     num_workers=args.workers)

    # step 4: run
    weighted_losses = []
    train_losses = []
    val_losses = []
    val_nme = 1e6
    for epoch in range(args.start_epoch, args.end_epoch + 1):
        weighted_train_loss, train_loss = train(dataloader, plfd_backbone,
                                                auxiliarynet, criterion,
                                                optimizer, epoch)

        if epoch % args.epoch_interval == 0:
            filename = os.path.join(str(args.snapshot),
                                    "checkpoint_epoch_" + str(epoch) + '.pth')
            save_checkpoint(
                {
                    'epoch': epoch,
                    'plfd_backbone': plfd_backbone.state_dict(),
                    'auxiliarynet': auxiliarynet.state_dict()
                }, filename)

        val_loss, cur_val_nme = validate(wlfw_val_dataloader, plfd_backbone,
                                         auxiliarynet, criterion)
        if cur_val_nme < val_nme:
            filename = os.path.join(str(args.snapshot),
                                    "checkpoint_min_nme.pth")
            save_checkpoint(
                {
                    'epoch': epoch,
                    'plfd_backbone': plfd_backbone.state_dict(),
                    'auxiliarynet': auxiliarynet.state_dict()
                }, filename)
            val_nme = cur_val_nme
        scheduler.step(val_loss)

        weighted_losses.append(weighted_train_loss.item())
        train_losses.append(train_loss.item())
        val_losses.append(val_loss.item())
        logging.info(
            "epoch: {}, weighted_train_loss: {:.4f}, trainset loss: {:.4f}  valset loss: {:.4f}  best val "
            "nme: {:.4f}\n ".format(epoch, weighted_train_loss, train_loss,
                                    val_loss, val_nme))

    weighted_losses = " ".join(list(map(str, weighted_losses)))
    train_losses = " ".join(list(map(str, train_losses)))
    val_losses = " ".join(list(map(str, val_losses)))
    logging.info(weighted_losses)
    logging.info(train_losses)
    logging.info(val_losses)
Exemplo n.º 2
0
def main(args):
    # Step 1: parse args config
    logging.basicConfig(
        format=
        '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s',
        level=logging.INFO,
        handlers=[
            logging.FileHandler(args.log_file, mode='w'),
            logging.StreamHandler()
        ])
    print_args(args)

    # Step 2: model, criterion, optimizer, scheduler
    plfd_backbone = PFLDInference().cuda()
    auxiliarynet = AuxiliaryNet().cuda()
    criterion = PFLDLoss()
    optimizer = torch.optim.Adam([{
        'params': plfd_backbone.parameters()
    }, {
        'params': auxiliarynet.parameters()
    }],
                                 lr=args.base_lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=args.lr_patience, verbose=True)

    # step 3: data
    # argumetion
    transform = transforms.Compose([transforms.ToTensor()])
    wlfwdataset = WLFWDatasets(args.dataroot, transform)
    dataloader = DataLoader(wlfwdataset,
                            batch_size=args.train_batchsize,
                            shuffle=True,
                            num_workers=args.workers,
                            drop_last=False)

    wlfw_val_dataset = WLFWDatasets(args.val_dataroot, transform)
    wlfw_val_dataloader = DataLoader(wlfw_val_dataset,
                                     batch_size=args.val_batchsize,
                                     shuffle=False,
                                     num_workers=args.workers)

    # step 4: run
    writer = SummaryWriter(args.tensorboard)
    for epoch in range(args.start_epoch, args.end_epoch + 1):
        weighted_train_loss, train_loss = train(dataloader, plfd_backbone,
                                                auxiliarynet, criterion,
                                                optimizer, epoch)
        filename = os.path.join(str(args.snapshot),
                                "checkpoint_epoch_" + str(epoch) + '.pth.tar')
        save_checkpoint(
            {
                'epoch': epoch,
                'plfd_backbone': plfd_backbone.state_dict(),
                'auxiliarynet': auxiliarynet.state_dict()
            }, filename)

        val_loss = validate(wlfw_val_dataloader, plfd_backbone, auxiliarynet,
                            criterion, epoch)

        scheduler.step(val_loss)
        writer.add_scalar('data/weighted_loss', weighted_train_loss, epoch)
        writer.add_scalars('data/loss', {
            'val loss': val_loss,
            'train loss': train_loss
        }, epoch)
    writer.close()
Exemplo n.º 3
0
def main(args):
    # Step 1: parse args config
    logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s',
                        level=logging.INFO,
                        handlers=[
                            logging.FileHandler(args.log_file, mode='w'),
                            logging.StreamHandler()
                        ])
    print_args(args)

    # Step 2: model, criterion, optimizer, scheduler
    plfd_backbone = PFLDInference().to(device)
    auxiliarynet = AuxiliaryNet().to(device)
    # criterion = PFLDLoss()
    criterion = LandmarkLoss()
    optimizer = torch.optim.Adam([{
        'params': plfd_backbone.parameters()
    }, {
        'params': auxiliarynet.parameters()
    }],
                                 lr=args.base_lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=args.lr_patience, verbose=True)

    if args.resume_path:
        print('loading checkpoint {}'.format(args.resume_path))
        checkpoint = torch.load(str(args.resume_path))
        args.start_epoch = checkpoint['epoch']
        plfd_backbone.load_state_dict(checkpoint['plfd_backbone'])
        auxiliarynet.load_state_dict(checkpoint['auxiliarynet'])
        if 'optimizer' in checkpoint.keys():
            optimizer.load_state_dict(checkpoint['optimizer'])

    # step 3: data
    # argumetion

    train_transform = transforms.Compose([
        AugCrop(output_size=112, is_training=True),
        HorizontalFlip(mirror=args.mirror_file),
        RandomRotate(max_angle=30),
        Affine(max_strength=30, output_size=112),
        ColorDistort()
    ])
    val_transform = transforms.Compose([AugCrop(output_size=112)])
    ibugdataset = IBUGDatasets(args.train_json,
                               transform=train_transform,
                               is_train=True)
    train_dataset_size = ibugdataset.get_dataset_size()
    sampler = RandomSampler(ibugdataset,
                            replacement=True,
                            num_samples=train_dataset_size)
    dataloader = DataLoader(ibugdataset,
                            batch_size=args.train_batchsize,
                            sampler=sampler,
                            num_workers=args.workers,
                            drop_last=False)

    ibug_val_dataset = IBUGDatasets(args.val_json, transform=val_transform)
    val_dataset_size = ibug_val_dataset.get_dataset_size()
    val_sampler = RandomSampler(ibug_val_dataset,
                                replacement=True,
                                num_samples=val_dataset_size)
    ibug_val_dataloader = DataLoader(ibug_val_dataset,
                                     batch_size=args.val_batchsize,
                                     sampler=val_sampler,
                                     num_workers=args.workers)

    # step 4: run
    writer = SummaryWriter(args.tensorboard)
    for epoch in range(args.start_epoch, args.end_epoch + 1):
        weighted_train_loss, train_loss = train(dataloader, plfd_backbone,
                                                auxiliarynet, criterion,
                                                optimizer, epoch)
        filename = os.path.join(str(args.snapshot),
                                "checkpoint_epoch_" + str(epoch) + '.pth.tar')
        save_checkpoint(
            {
                'epoch': epoch,
                'plfd_backbone': plfd_backbone.state_dict(),
                'auxiliarynet': auxiliarynet.state_dict(),
                'optimizer': optimizer.state_dict()
            }, filename)

        val_loss = validate(ibug_val_dataloader, plfd_backbone, auxiliarynet,
                            criterion, epoch)

        scheduler.step(val_loss)
        writer.add_scalar('data/weighted_loss', weighted_train_loss, epoch)
        writer.add_scalars('data/loss', {
            'val loss': val_loss,
            'train loss': train_loss
        }, epoch)
    writer.close()