Пример #1
0
def main():
    # [*] args, loggers and tensorboard
    args = parse_args()
    reset_config(config, args)

    logger, _, tb_log_dir = create_logger(config, 'DAG', 'train')
    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
    }

    # [*] gpus parallel and model prepare
    # prepare pretrained model -- download from google drive
    # auto-download train model from GoogleDrive
    if not os.path.exists('./pretrain'):
        os.makedirs('./pretrain')

    if config.DAG.TRAIN.ALIGN:
        print('====> train object-aware version <====')
        model = models.__dict__[config.DAG.TRAIN.MODEL](
            align=True).cuda()  # build model
    else:
        print(
            '====> Default: train without object-aware, also prepare for DAGPlus <===='
        )
        model = models.__dict__[config.DAG.TRAIN.MODEL](
            align=False).cuda()  # build model

    print(model)

    model = load_pretrain(model, './pretrain/{0}'.format(
        config.DAG.TRAIN.PRETRAIN))  # load pretrain

    # get optimizer
    if not config.DAG.TRAIN.START_EPOCH == config.DAG.TRAIN.UNFIX_EPOCH:  # 10
        optimizer, lr_scheduler = build_opt_lr(config, model,
                                               config.DAG.TRAIN.START_EPOCH)
    else:
        optimizer, lr_scheduler = build_opt_lr(config, model, 0)

    # check trainable again
    print('==========double check trainable==========')
    trainable_params = check_trainable(model, logger)

    if config.DAG.TRAIN.RESUME and config.DAG.TRAIN.START_EPOCH != 0:  # resume
        model, optimizer, args.start_epoch, arch = restore_from(
            model, optimizer, config.DAG.TRAIN.RESUME)

    # parallel
    gpus = [int(i) for i in config.GPUS.split(',')]
    gpu_num = len(gpus)
    logger.info('GPU NUM: {:2d}'.format(len(gpus)))

    device = torch.device(
        'cuda:{}'.format(gpus[0]) if torch.cuda.is_available() else 'cpu')
    model = torch.nn.DataParallel(model, device_ids=gpus).to(device)

    logger.info(lr_scheduler)
    logger.info('model prepare done')

    # [*] train

    for epoch in range(config.DAG.TRAIN.START_EPOCH,
                       config.DAG.TRAIN.END_EPOCH):
        # build dataloader, benefit to tracking
        train_set = DAGDataset(config)

        train_loader = DataLoader(train_set,
                                  batch_size=config.DAG.TRAIN.BATCH * gpu_num,
                                  num_workers=config.WORKERS,
                                  pin_memory=True,
                                  sampler=None,
                                  drop_last=True)

        # check if it's time to train backbone
        if epoch == config.DAG.TRAIN.UNFIX_EPOCH:
            logger.info('training backbone')
            optimizer, lr_scheduler = build_opt_lr(config, model.module, epoch)
            print('==========double check trainable==========')
            check_trainable(model, logger)  # print trainable params info

        lr_scheduler.step(epoch)
        curLR = lr_scheduler.get_cur_lr()

        model, writer_dict = DAG_train(train_loader,
                                       model,
                                       optimizer,
                                       epoch + 1,
                                       curLR,
                                       config,
                                       writer_dict,
                                       logger,
                                       device=device)
        # save model
        save_model(model,
                   epoch,
                   optimizer,
                   config.DAG.TRAIN.MODEL,
                   config,
                   isbest=False)

    writer_dict['writer'].close()
Пример #2
0
def main():
    # [*] args, loggers and tensorboard
    args = parse_args()
    reset_config(config, args)

    logger, _, tb_log_dir = create_logger(config, 'SIAMRPN', 'train')
    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    writer_dict = {
        'writer': SummaryWriter(logdir=tb_log_dir),
        'train_global_steps': 0,
    }

    # [*] gpus parallel and model prepare
    # prepare pretrained model -- download from google drive
    if not os.path.exists('./pretrain'):
        os.makedirs('./pretrain')

    # auto-download train model from GoogleDrive
    try:
        DRIVEID = pretrain_zoo()

        if not os.path.exists('./pretrain/{}'.format(config.SIAMRPN.TRAIN.PRETRAIN)):
            os.system(
                'wget --no-check-certificate \'https://drive.google.com/uc?export=download&id={0}\' -O ./pretrain/{1}'
                    .format(DRIVEID[config.SIAMRPN.TRAIN.MODEL], config.SIAMRPN.TRAIN.PRETRAIN))
    except:
        print('auto-download pretrained model fail, please download it and put it in pretrain directory')


    # define model
    anchor_nums = len(config.SIAMRPN.TRAIN.ANCHORS_RATIOS) * len(config.SIAMRPN.TRAIN.ANCHORS_SCALES)
    model = models.__dict__[config.SIAMRPN.TRAIN.MODEL](anchors_nums=anchor_nums, cls_type=config.SIAMRPN.TRAIN.CLS_TYPE)  # build model
    print(model)
    model = load_pretrain(model, './pretrain/{0}'.format(config.SIAMRPN.TRAIN.PRETRAIN))    # load pretrain
    trainable_params = check_trainable(model, logger)    # print trainable params info
    optimizer = get_optimizer(config, trainable_params)  # optimizer
    lr_scheduler = lr_decay(config, optimizer)  # learning rate decay scheduler

    if config.SIAMRPN.TRAIN.RESUME :  # resume
        logger.info("resume from {}".format(config.SIAMRPN.TRAIN.RESUME))
        assert os.path.isfile(config.SIAMRPN.TRAIN.RESUME), \
            '{} is not a valid file.'.format(config.SIAMRPN.TRAIN.RESUME)

        model, optimizer, config.SIAMRPN.TRAIN.START_EPOCH = \
            restore_from(model, optimizer, config.SIAMRPN.TRAIN.RESUME)
        model.features.unfix((config.SIAMRPN.TRAIN.START_EPOCH - 1) / config.SIAMRPN.TRAIN.END_EPOCH)

    # parallel
    gpus = [int(i) for i in config.GPUS.split(',')]
    gpu_num = len(gpus)
    logger.info('GPU NUM: {:2d}'.format(len(gpus)))
    #model=model.cuda()
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
    logger.info('model prepare done')

    # [*] train

    for epoch in range(config.SIAMRPN.TRAIN.START_EPOCH, config.SIAMRPN.TRAIN.END_EPOCH):
        # build dataloader, benefit to tracking
        train_set = SiamRPNDataset(config)
        model.module.anchors = train_set.anchorsPairs
        model.module.cfg=config
        train_loader = DataLoader(train_set, batch_size=config.SIAMRPN.TRAIN.BATCH * gpu_num, num_workers=config.WORKERS,
                                  pin_memory=True, sampler=None)

        if config.SIAMRPN.TRAIN.LR_POLICY == 'log':
            curLR = lr_scheduler[epoch]
            for param_group in optimizer.param_groups:
                param_group['lr'] = curLR
        else:
            lr_scheduler.step()
            curLR = get_lr(optimizer)


        model, writer_dict = siamrpn_train(train_loader, model, optimizer, epoch + 1, curLR, config, writer_dict,
                                           logger, cls_type = config.SIAMRPN.TRAIN.CLS_TYPE)

        # save model
        save_model(model, epoch, optimizer, config.SIAMRPN.TRAIN.MODEL, config, isbest=False)


    writer_dict['writer'].close()
Пример #3
0
def main():
    # [*] args, loggers and tensorboard
    args = parse_args()
    reset_config(config, args)

    logger, _, tb_log_dir = create_logger(config, 'SIAMFC', 'train')
    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
    }

    # [*] gpus parallel and model prepare
    # prepare
    model = models.__dict__[config.SIAMFC.TRAIN.MODEL]()  # build model
    model = load_pretrain(model, config.SIAMFC.TRAIN.PRETRAIN)  # load pretrain
    trainable_params = check_trainable(model,
                                       logger)  # print trainable params info
    optimizer = get_optimizer(config, trainable_params)  # optimizer
    lr_scheduler = lr_decay(config, optimizer)  # learning rate decay scheduler

    if config.SIAMFC.TRAIN.RESUME and config.SIAMFC.TRAIN.START_EPOCH != 0:  # resume
        model.features.unfix((config.SIAMFC.TRAIN.START_EPOCH - 1) /
                             config.SIAMFC.TRAIN.END_EPOCH)
        model, optimizer, args.start_epoch, arch = restore_from(
            model, optimizer, config.SIAMFC.TRAIN.RESUME)

    # parallel
    gpus = [int(i) for i in config.GPUS.split(',')]
    gpu_num = len(gpus)
    logger.info('GPU NUM: {:2d}'.format(len(gpus)))
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
    logger.info('model prepare done')

    # [*] train

    for epoch in range(config.SIAMFC.TRAIN.START_EPOCH,
                       config.SIAMFC.TRAIN.END_EPOCH):
        # build dataloader, benefit to tracking
        train_set = SiamFCDataset(config)
        train_loader = DataLoader(train_set,
                                  batch_size=config.SIAMFC.TRAIN.BATCH *
                                  gpu_num,
                                  num_workers=config.WORKERS,
                                  pin_memory=True,
                                  sampler=None)

        if config.SIAMFC.TRAIN.LR_POLICY == 'log':
            curLR = lr_scheduler[epoch]
            for param_group in optimizer.param_groups:
                param_group['lr'] = curLR
        else:
            lr_scheduler.step()

        model, writer_dict = siamfc_train(train_loader, model, optimizer,
                                          epoch + 1, curLR, config,
                                          writer_dict, logger)

        # save model
        save_model(model,
                   epoch,
                   optimizer,
                   config.SIAMFC.TRAIN.MODEL,
                   config,
                   isbest=False)

    writer_dict['writer'].close()