def _train_impl(args, model_specs, logger): if len(args.output) > 0: _make_dirs(args.output) # dataiter dataset_specs_tgt = get_dataset_specs_tgt(args, model_specs) scale, mean_, _ = _get_scalemeanstd() if scale > 0: mean_ /= scale margs = argparse.Namespace(**model_specs) dargs = argparse.Namespace(**dataset_specs_tgt) # number of list_lines split_filename = 'issegm/data_list/{}/{}.lst'.format( margs.dataset, args.split) num_source = 0 with open(split_filename) as f: for item in f.readlines(): num_source = num_source + 1 # batches_per_epoch = num_source // args.batch_images # optimizer assert args.to_epoch is not None if args.stop_epoch is not None: assert args.stop_epoch > args.from_epoch and args.stop_epoch <= args.to_epoch else: args.stop_epoch = args.to_epoch from_iter = args.from_epoch * batches_per_epoch to_iter = args.to_epoch * batches_per_epoch lr_params = model_specs['lr_params'] base_lr = lr_params['base'] if lr_params['type'] == 'fixed': scheduler = FixedScheduler() elif lr_params['type'] == 'step': left_step = [] for step in lr_params['args']['step']: if from_iter > step: base_lr *= lr_params['args']['factor'] continue left_step.append(step - from_iter) model_specs['lr_params']['step'] = left_step scheduler = mx.lr_scheduler.MultiFactorScheduler(**lr_params['args']) elif lr_params['type'] == 'linear': scheduler = LinearScheduler(updates=to_iter + 1, frequency=50, stop_lr=min(base_lr / 100., 1e-6), offset=from_iter) elif lr_params['type'] == 'poly': scheduler = PolyScheduler(updates=to_iter + 1, frequency=50, stop_lr=min(base_lr / 100., 1e-8), power=0.9, offset=from_iter) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2) optimizer_params = { 'learning_rate': base_lr, 'momentum': 0.9, 'wd': args.weight_decay, 'lr_scheduler': scheduler, 'rescale_grad': 1.0 / len(args.gpus.split(',')), } data_src_port = args.init_src_port data_src_num = int(num_source * data_src_port) mod = _get_module(args, margs, dargs) addr_weights = args.weights # first weights should be xxxx_ep-0000.params! addr_output = args.output # initializer net_args = None net_auxs = None ### if addr_weights is not None: net_args, net_auxs = mxutil.load_params_from_file(addr_weights) ####################################### training model to_model = osp.join(addr_output, str(args.idx_round), '{}_ep'.format(args.model)) dataiter = FileIter( dataset=margs.dataset, split=args.split, data_root=args.data_root, num_sel_source=data_src_num, num_source=num_source, seed_int=args.seed_int, dataset_tgt=args.dataset_tgt, split_tgt=args.split_tgt, data_root_tgt=args.data_root_tgt, sampler='random', batch_images=args.batch_images, meta=dataset_specs_tgt, rgb_mean=mean_, feat_stride=margs.feat_stride, label_stride=margs.feat_stride, origin_size=args.origin_size, origin_size_tgt=args.origin_size_tgt, crop_size=args.crop_size, scale_rate_range=[float(_) for _ in args.scale_rate_range.split(',')], transformer=None, transformer_image=ts.Compose(_get_transformer_image()), prefetch_threads=args.prefetch_threads, prefetcher_type=args.prefetcher, ) dataiter.reset() mod.fit( dataiter, eval_metric=_get_metric(), batch_end_callback=mx.callback.log_train_metric(10, auto_reset=False), epoch_end_callback=mx.callback.do_checkpoint(to_model), kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, initializer=initializer, arg_params=net_args, aux_params=net_auxs, allow_missing=args.from_epoch == 0, begin_epoch=args.from_epoch, num_epoch=args.stop_epoch, )
def _train_impl(args, model_specs, logger): if len(args.output) > 0: _make_dirs(args.output) # dataiter dataset_specs = get_dataset_specs(args, model_specs) scale, mean_, _ = _get_scalemeanstd() if scale > 0: mean_ /= scale margs = argparse.Namespace(**model_specs) dargs = argparse.Namespace(**dataset_specs) dataiter = FileIter( dataset=margs.dataset, split=args.split, data_root=args.data_root, sampler='random', batch_images=args.batch_images, meta=dataset_specs, rgb_mean=mean_, feat_stride=margs.feat_stride, label_stride=margs.feat_stride, origin_size=args.origin_size, crop_size=args.crop_size, scale_rate_range=[float(_) for _ in args.scale_rate_range.split(',')], transformer=None, transformer_image=ts.Compose(_get_transformer_image()), prefetch_threads=args.prefetch_threads, prefetcher_type=args.prefetcher, ) dataiter.reset() # optimizer assert args.to_epoch is not None if args.stop_epoch is not None: assert args.stop_epoch > args.from_epoch and args.stop_epoch <= args.to_epoch else: args.stop_epoch = args.to_epoch from_iter = args.from_epoch * dataiter.batches_per_epoch to_iter = args.to_epoch * dataiter.batches_per_epoch lr_params = model_specs['lr_params'] base_lr = lr_params['base'] if lr_params['type'] == 'fixed': scheduler = FixedScheduler() elif lr_params['type'] == 'step': left_step = [] for step in lr_params['args']['step']: if from_iter > step: base_lr *= lr_params['args']['factor'] continue left_step.append(step - from_iter) model_specs['lr_params']['step'] = left_step scheduler = mx.lr_scheduler.MultiFactorScheduler(**lr_params['args']) elif lr_params['type'] == 'linear': scheduler = LinearScheduler(updates=to_iter + 1, frequency=50, stop_lr=min(base_lr / 100., 1e-6), offset=from_iter) optimizer_params = { 'learning_rate': base_lr, 'momentum': 0.9, 'wd': args.weight_decay, 'lr_scheduler': scheduler, 'rescale_grad': 1.0 / len(args.gpus.split(',')), } # initializer net_args = None net_auxs = None if args.weights is not None: net_args, net_auxs = mxutil.load_params_from_file(args.weights) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2) # to_model = osp.join(args.output, '{}_ep'.format(args.model)) mod = _get_module(args, margs, dargs) mod.fit( dataiter, eval_metric=_get_metric(), batch_end_callback=mx.callback.Speedometer(dataiter.batch_size, 1), epoch_end_callback=mx.callback.do_checkpoint(to_model), kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, initializer=initializer, arg_params=net_args, aux_params=net_auxs, allow_missing=args.from_epoch == 0, begin_epoch=args.from_epoch, num_epoch=args.stop_epoch, )