def main(): # [*] args, loggers and tensorboard args = parse_args() reset_config(config, args) logger, _, tb_log_dir = create_logger(config, 'DAG', 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, } # [*] gpus parallel and model prepare # prepare pretrained model -- download from google drive # auto-download train model from GoogleDrive if not os.path.exists('./pretrain'): os.makedirs('./pretrain') if config.DAG.TRAIN.ALIGN: print('====> train object-aware version <====') model = models.__dict__[config.DAG.TRAIN.MODEL]( align=True).cuda() # build model else: print( '====> Default: train without object-aware, also prepare for DAGPlus <====' ) model = models.__dict__[config.DAG.TRAIN.MODEL]( align=False).cuda() # build model print(model) model = load_pretrain(model, './pretrain/{0}'.format( config.DAG.TRAIN.PRETRAIN)) # load pretrain # get optimizer if not config.DAG.TRAIN.START_EPOCH == config.DAG.TRAIN.UNFIX_EPOCH: # 10 optimizer, lr_scheduler = build_opt_lr(config, model, config.DAG.TRAIN.START_EPOCH) else: optimizer, lr_scheduler = build_opt_lr(config, model, 0) # check trainable again print('==========double check trainable==========') trainable_params = check_trainable(model, logger) if config.DAG.TRAIN.RESUME and config.DAG.TRAIN.START_EPOCH != 0: # resume model, optimizer, args.start_epoch, arch = restore_from( model, optimizer, config.DAG.TRAIN.RESUME) # parallel gpus = [int(i) for i in config.GPUS.split(',')] gpu_num = len(gpus) logger.info('GPU NUM: {:2d}'.format(len(gpus))) device = torch.device( 'cuda:{}'.format(gpus[0]) if torch.cuda.is_available() else 'cpu') model = torch.nn.DataParallel(model, device_ids=gpus).to(device) logger.info(lr_scheduler) logger.info('model prepare done') # [*] train for epoch in range(config.DAG.TRAIN.START_EPOCH, config.DAG.TRAIN.END_EPOCH): # build dataloader, benefit to tracking train_set = DAGDataset(config) train_loader = DataLoader(train_set, batch_size=config.DAG.TRAIN.BATCH * gpu_num, num_workers=config.WORKERS, pin_memory=True, sampler=None, drop_last=True) # check if it's time to train backbone if epoch == config.DAG.TRAIN.UNFIX_EPOCH: logger.info('training backbone') optimizer, lr_scheduler = build_opt_lr(config, model.module, epoch) print('==========double check trainable==========') check_trainable(model, logger) # print trainable params info lr_scheduler.step(epoch) curLR = lr_scheduler.get_cur_lr() model, writer_dict = DAG_train(train_loader, model, optimizer, epoch + 1, curLR, config, writer_dict, logger, device=device) # save model save_model(model, epoch, optimizer, config.DAG.TRAIN.MODEL, config, isbest=False) writer_dict['writer'].close()
def main(): # [*] args, loggers and tensorboard args = parse_args() reset_config(config, args) logger, _, tb_log_dir = create_logger(config, 'SIAMRPN', 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) writer_dict = { 'writer': SummaryWriter(logdir=tb_log_dir), 'train_global_steps': 0, } # [*] gpus parallel and model prepare # prepare pretrained model -- download from google drive if not os.path.exists('./pretrain'): os.makedirs('./pretrain') # auto-download train model from GoogleDrive try: DRIVEID = pretrain_zoo() if not os.path.exists('./pretrain/{}'.format(config.SIAMRPN.TRAIN.PRETRAIN)): os.system( 'wget --no-check-certificate \'https://drive.google.com/uc?export=download&id={0}\' -O ./pretrain/{1}' .format(DRIVEID[config.SIAMRPN.TRAIN.MODEL], config.SIAMRPN.TRAIN.PRETRAIN)) except: print('auto-download pretrained model fail, please download it and put it in pretrain directory') # define model anchor_nums = len(config.SIAMRPN.TRAIN.ANCHORS_RATIOS) * len(config.SIAMRPN.TRAIN.ANCHORS_SCALES) model = models.__dict__[config.SIAMRPN.TRAIN.MODEL](anchors_nums=anchor_nums, cls_type=config.SIAMRPN.TRAIN.CLS_TYPE) # build model print(model) model = load_pretrain(model, './pretrain/{0}'.format(config.SIAMRPN.TRAIN.PRETRAIN)) # load pretrain trainable_params = check_trainable(model, logger) # print trainable params info optimizer = get_optimizer(config, trainable_params) # optimizer lr_scheduler = lr_decay(config, optimizer) # learning rate decay scheduler if config.SIAMRPN.TRAIN.RESUME : # resume logger.info("resume from {}".format(config.SIAMRPN.TRAIN.RESUME)) assert os.path.isfile(config.SIAMRPN.TRAIN.RESUME), \ '{} is not a valid file.'.format(config.SIAMRPN.TRAIN.RESUME) model, optimizer, config.SIAMRPN.TRAIN.START_EPOCH = \ restore_from(model, optimizer, config.SIAMRPN.TRAIN.RESUME) model.features.unfix((config.SIAMRPN.TRAIN.START_EPOCH - 1) / config.SIAMRPN.TRAIN.END_EPOCH) # parallel gpus = [int(i) for i in config.GPUS.split(',')] gpu_num = len(gpus) logger.info('GPU NUM: {:2d}'.format(len(gpus))) #model=model.cuda() model = torch.nn.DataParallel(model, device_ids=gpus).cuda() logger.info('model prepare done') # [*] train for epoch in range(config.SIAMRPN.TRAIN.START_EPOCH, config.SIAMRPN.TRAIN.END_EPOCH): # build dataloader, benefit to tracking train_set = SiamRPNDataset(config) model.module.anchors = train_set.anchorsPairs model.module.cfg=config train_loader = DataLoader(train_set, batch_size=config.SIAMRPN.TRAIN.BATCH * gpu_num, num_workers=config.WORKERS, pin_memory=True, sampler=None) if config.SIAMRPN.TRAIN.LR_POLICY == 'log': curLR = lr_scheduler[epoch] for param_group in optimizer.param_groups: param_group['lr'] = curLR else: lr_scheduler.step() curLR = get_lr(optimizer) model, writer_dict = siamrpn_train(train_loader, model, optimizer, epoch + 1, curLR, config, writer_dict, logger, cls_type = config.SIAMRPN.TRAIN.CLS_TYPE) # save model save_model(model, epoch, optimizer, config.SIAMRPN.TRAIN.MODEL, config, isbest=False) writer_dict['writer'].close()
def main(): # [*] args, loggers and tensorboard args = parse_args() reset_config(config, args) logger, _, tb_log_dir = create_logger(config, 'SIAMFC', 'train') logger.info(pprint.pformat(args)) logger.info(pprint.pformat(config)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, } # [*] gpus parallel and model prepare # prepare model = models.__dict__[config.SIAMFC.TRAIN.MODEL]() # build model model = load_pretrain(model, config.SIAMFC.TRAIN.PRETRAIN) # load pretrain trainable_params = check_trainable(model, logger) # print trainable params info optimizer = get_optimizer(config, trainable_params) # optimizer lr_scheduler = lr_decay(config, optimizer) # learning rate decay scheduler if config.SIAMFC.TRAIN.RESUME and config.SIAMFC.TRAIN.START_EPOCH != 0: # resume model.features.unfix((config.SIAMFC.TRAIN.START_EPOCH - 1) / config.SIAMFC.TRAIN.END_EPOCH) model, optimizer, args.start_epoch, arch = restore_from( model, optimizer, config.SIAMFC.TRAIN.RESUME) # parallel gpus = [int(i) for i in config.GPUS.split(',')] gpu_num = len(gpus) logger.info('GPU NUM: {:2d}'.format(len(gpus))) model = torch.nn.DataParallel(model, device_ids=gpus).cuda() logger.info('model prepare done') # [*] train for epoch in range(config.SIAMFC.TRAIN.START_EPOCH, config.SIAMFC.TRAIN.END_EPOCH): # build dataloader, benefit to tracking train_set = SiamFCDataset(config) train_loader = DataLoader(train_set, batch_size=config.SIAMFC.TRAIN.BATCH * gpu_num, num_workers=config.WORKERS, pin_memory=True, sampler=None) if config.SIAMFC.TRAIN.LR_POLICY == 'log': curLR = lr_scheduler[epoch] for param_group in optimizer.param_groups: param_group['lr'] = curLR else: lr_scheduler.step() model, writer_dict = siamfc_train(train_loader, model, optimizer, epoch + 1, curLR, config, writer_dict, logger) # save model save_model(model, epoch, optimizer, config.SIAMFC.TRAIN.MODEL, config, isbest=False) writer_dict['writer'].close()