def _non_dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist=False) ] # put model on gpus model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(CfgSaverHook(cfg), priority='VERY_LOW') if validate: # TODO: should implement dist, nondist version evaluation if isinstance(model.module, RPN): runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) else: if cfg.data.val.type == 'CocoDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) else: runner.register_hook(NonDistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist=False) ] # put model on gpus model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config) if validate: if isinstance(model.module, RPN): runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) elif cfg.data.val.type == 'CocoDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) elif cfg.data.val.type in ['KittiLiDAR', 'KittiRGB']: runner.register_hook(KittiEvalmAPHook(cfg.data.val, \ interval=cfg.eval_interval)) else: runner.register_hook(DistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) else: if cfg.data.val.type == 'CocoDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) else: runner.register_hook(DistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders try: selsa_imgs = cfg.data.selsa_imgs except Exception: selsa_imgs = cfg.data.imgs_per_gpu dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, selsa_imgs=selsa_imgs, dist=True) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) mod = cfg.pop('mod', False) # build runner optimizer = build_optimizer(model, cfg.optimizer, mod) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner( model, batch_processor, optimizer, cfg.work_dir, logger=logger) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset) or issubclass(dataset_type, datasets.Acoustic): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader( dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = getattr(datasets, val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.evaluation if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: if isinstance(val_dataset_cfg, dict): runner.register_hook( KaggleEvalHook(val_dataset_cfg, **eval_cfg)) elif isinstance(val_dataset_cfg, list): for vdc in val_dataset_cfg: runner.register_hook(KaggleEvalHook(vdc, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, **kwargs): logger = get_root_logger(cfg.log_level) # prepare data loaders data_loaders = [ build_dataloader( dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True, repeat_samples=cfg.train_cfg.repeat_samples, **kwargs) ] # put model on gpus model = MMDistributedDataParallel(model.cuda(), device_ids=[torch.cuda.current_device()]) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = EpochBasedRunner( model, #batch_processor=batch_processor, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger) #runner = Runner(model, batch_processor, optimizer, cfg.work_dir, # logger) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg)) else: dataset_type = getattr(datasets, val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg)) else: runner.register_hook(DistEvalmAPHook(val_dataset_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train_runner(model, trainDataset, valDataset, cfg, validate=False) -> Runner: # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = valDataset eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = getattr(mmdetDatasets, val_dataset_cfg.type) if issubclass(dataset_type, CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) return runner
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) # model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg)) else: dataset_type = getattr(datasets, val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg)) else: runner.register_hook(DistEvalmAPHook(val_dataset_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) for m in model.modules(): if isinstance(m, torch.nn.SyncBatchNorm): m._specify_ddp_gpu_num(1) model = MMDistributedDataParallel(model.cuda()) # model = DistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg)) else: dataset_type = getattr(datasets, val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg)) else: runner.register_hook(DistEvalmAPHook(val_dataset_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders try: pad_size = cfg.data.pad_size print("using padding size") except: pad_size = None data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, pad_size=pad_size, dist=True) ] rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() # put model on gpus model = MMDistributedDataParallel(model.cuda(rank % num_gpus)) torch.cuda.empty_cache() # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) else: if cfg.data.val.type == 'CocoDataset' or cfg.data.val.type == 'CocoZipDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) else: runner.register_hook(DistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None, runner_attr_dict=dict()): if validate: raise NotImplementedError('Built-in validation is not implemented ' 'yet in not-distributed training. Use ' 'distributed training or test.py and ' '*eval.py scripts instead.') # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist=False) for ds in dataset ] # build runner runner_attr_dict.update({ 'imgs_per_gpu': cfg.data.imgs_per_gpu, 'initial_lr': cfg.optimizer['lr'] }) if hasattr(dataset, 'CLASSES'): runner_attr_dict.update({'classes': dataset.CLASSES}) optimizer = build_optimizer(model, cfg.optimizer) search_optimizer = getattr(getattr(cfg, 'search_config', {}), 'search_optimizer', None) assert search_optimizer is None, "Not support" runner = Runner(model, batch_processor, optimizer, search_optimizer, cfg.work_dir, logger=logger, runner_attr_dict=runner_attr_dict) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=False) else: optimizer_config = cfg.optimizer_config runner.register_training_hooks(cfg.lr_config, optimizer_config) # register eval hooks if validate: if isinstance(model.module, RPN): runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) else: if cfg.data.val.type == 'CocoDataset': runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) else: runner.register_hook(DistEvalmAPHook(cfg.data.val)) register_hooks(runner, cfg) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_train_runner(model, trainDataset, valDataset, cfg, validate=False) -> Runner: # put model on gpus model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=False) else: optimizer_config = cfg.optimizer_config runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) before = toSingleGPUModeBefore after = toSingleGPUModeAfter # register eval hooks if validate: val_dataset_cfg = valDataset eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( HookWrapper( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg), before, after)) else: dataset_type = getattr(mmdetDatasets, val_dataset_cfg.type) if issubclass(dataset_type, mmdetDatasets.CocoDataset): runner.register_hook( HookWrapper( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg), before, after)) else: runner.register_hook( HookWrapper(DistEvalmAPHook(val_dataset_cfg, **eval_cfg), before, after)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: weightsPath = cfg.load_from if weightsPath is not None: if weightsPath.startswith("open-mmlab://"): cfgName = weightsPath[len("open-mmlab://"):] weightsPath = checkpoint_registry.getPath(cfgName) if cfg.resetHeads: torchHome = torch.hub._get_torch_home() chpName = os.path.basename(weightsPath)[0:(-1) * len(".pth")] noHeadWeightsPath = os.path.join( torchHome, f"checkpoints/nohead/{chpName}_nohead.pth") if not os.path.exists(noHeadWeightsPath): if isURL(weightsPath): weights = model_zoo.load_url(weightsPath) else: weights = torch.load(weightsPath) weights['state_dict'] = { k: v for k, v in weights['state_dict'].items() if not k.startswith('bbox_head') and not k.startswith('mask_head') } weightsDir = os.path.dirname(noHeadWeightsPath) if not os.path.exists(weightsDir): os.mkdir(weightsDir) torch.save(weights, noHeadWeightsPath) weightsPath = noHeadWeightsPath runner.load_checkpoint(weightsPath) return runner
def _dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None, runner_attr_dict=dict()): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) for ds in dataset ] # build runner runner_attr_dict.update({ 'imgs_per_gpu': cfg.data.imgs_per_gpu, 'initial_lr': cfg.optimizer['lr'] }) if hasattr(dataset, 'CLASSES'): runner_attr_dict.update({'classes': dataset.CLASSES}) optimizer = build_optimizer(model, cfg.optimizer) search_optimizer = getattr(cfg, 'search_config', {}).pop('search_optimizer', None) runner = Runner(model, batch_processor, optimizer, search_optimizer, cfg.work_dir, logger=logger, runner_attr_dict=runner_attr_dict) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config) runner.register_hook(DistSamplerSeedHook()) if search_optimizer is not None: runner.register_hook(DistSearchOptimizerHook()) runner.register_hook(SearchHook(**cfg.search_config)) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) register_hooks(runner, cfg) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'use_img_sampling' not in cfg.data: cfg.data.update({'use_img_sampling': False}) if 'use_sample_out' not in cfg.data: cfg.data.update({'use_sample_out': False}) print('--Dist-train--IS:{}--ISout:{}'.format(cfg.data.use_img_sampling, cfg.data.use_sample_out)) data_loaders = [ build_dataloader(ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True, use_img_sampling=cfg.data.use_img_sampling, use_sample_out=cfg.data.use_sample_out) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # Add for LVIS by LiYu import logging runner.logger.setLevel(logging.INFO) # ==================== # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) for ds in dataset ] # for index, item in enumerate(data_loaders): # a = iter(item) # print('~~~', index, a, next(a)) # break from mmdet.apis import get_root_logger logger = get_root_logger() logger.info(model) model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner( model, batch_processor, optimizer, cfg.work_dir, logger=logger) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.total_epochs, len(data_loaders[0])) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if cfg.data.val.type in ['KittiDataset', 'KittiInCocoDataset', 'Kitti3dDataset']: runner.register_hook( KITTIDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) elif issubclass(dataset_type, datasets.CocoDataset): logger.info('Using CocoDistEvalmAPHook.') runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, logger=None, timestamp=None): # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] data_loaders = [ build_dataloader( ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) for ds in dataset ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # # if model.module.bbox_head.freeze_solov2_and_train_combonly: # if model.module.bbox_head.optimize_list is not None: # for (key, param) in model.named_parameters(): # # if 'kernel_convs_convcomb' not in key and 'context_fusion_convs' not in key and 'learned_weight' not in key: # if not any(s in key for s in model.module.bbox_head.optimize_list): # param.requires_grad=False # else: # # print('optimize {}'.format(key)) # logger.info('optimize {}'.format(key)) # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner( model, batch_processor, optimizer, cfg.work_dir, logger=logger) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: dataset_type = DATASETS.get(val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook( CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg)) else: runner.register_hook( DistEvalmAPHook(val_dataset_cfg, **eval_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs) ## add test after training if cfg.data.test.ann_file != 'data/lvis/lvis_v0.5_val_lvis_freqset.json': # if val set is lvis freq, only eval on lvis-freq val set cfg.data.test.test_mode = True dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) model_orig=model.module model = MMDataParallel(model, device_ids=[0]).cuda() # data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100] outputs = single_gpu_test(model, data_loader) print('\nwriting results to {}'.format('xxx')) # mmcv.dump(outputs, 'xxx') eval_types = ['segm'] if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = 'xxx' coco_eval(result_file, eval_types, dataset.coco) else: if not isinstance(outputs[0], dict): result_files = results2json_segm(dataset, outputs, 'xxx', dump=False) coco_eval(result_files, eval_types, dataset.coco) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = 'xxx' + '.{}'.format(name) result_files = results2json(dataset, outputs_, result_file, dump=False) coco_eval(result_files, eval_types, dataset.coco) ##eval on lvis-77###### cfg.data.test.ann_file = 'data/lvis/lvis_v0.5_val_cocofied.json' cfg.data.test.img_prefix = 'data/lvis/val2017/' cfg.data.test.test_mode = True dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # model_orig=model.module # model = MMDataParallel(model, device_ids=[0]).cuda() # data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100] outputs = single_gpu_test(model, data_loader) print('\nwriting results to {}'.format('xxx')) # mmcv.dump(outputs, 'xxx') eval_types = ['segm'] if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = 'xxx' coco_eval(result_file, eval_types, dataset.coco) else: if not isinstance(outputs[0], dict): result_files = results2json_segm(dataset, outputs, 'xxx', dump=False) from lvis import LVISEval lvisEval = LVISEval('data/lvis/lvis_v0.5_val_cocofied.json', result_files, 'segm') lvisEval.run() lvisEval.print_results() # fix lvis api eval iou_thr error, should be 0.9 but was 0.8999 lvisEval.params.iou_thrs[8] = 0.9 for iou in [0.5, 0.6, 0.7, 0.8, 0.9]: print('AP at iou {}: {}'.format(iou, lvisEval._summarize('ap', iou_thr=iou))) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = 'xxx' + '.{}'.format(name) result_files = results2json(dataset, outputs_, result_file, dump=False) coco_eval(result_files, eval_types, dataset.coco) else: ##eval on lvis-freq###### cfg.data.test.test_mode = True dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # model_orig=model.module # model = MMDataParallel(model, device_ids=[0]).cuda() data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100] outputs = single_gpu_test(model, data_loader) print('\nwriting results to {}'.format('xxx')) # mmcv.dump(outputs, 'xxx') eval_types = ['segm'] if eval_types: print('Starting evaluate {}'.format(' and '.join(eval_types))) if eval_types == ['proposal_fast']: result_file = 'xxx' coco_eval(result_file, eval_types, dataset.coco) else: if not isinstance(outputs[0], dict): result_files = results2json_segm(dataset, outputs, 'xxx', dump=False) from lvis import LVISEval lvisEval = LVISEval(cfg.data.test.ann_file, result_files, 'segm') lvisEval.run() lvisEval.print_results() # fix lvis api eval iou_thr error, should be 0.9 but was 0.8999 lvisEval.params.iou_thrs[8] = 0.9 for iou in [0.5, 0.6, 0.7, 0.8, 0.9]: print('AP at iou {}: {}'.format(iou, lvisEval._summarize('ap', iou_thr=iou))) else: for name in outputs[0]: print('\nEvaluating {}'.format(name)) outputs_ = [out[name] for out in outputs] result_file = 'xxx' + '.{}'.format(name) result_files = results2json(dataset, outputs_, result_file, dump=False) coco_eval(result_files, eval_types, dataset.coco)
def _dist_train(model, dataset, cfg, validate=False): # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) # runner = mmcvRunner(model, batch_processor, optimizer, cfg.work_dir, # cfg.log_level) runner = Runner(model, batch_processor, optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg)) else: dataset_type = getattr(datasets, val_dataset_cfg.type) if issubclass(dataset_type, datasets.CocoDataset): runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg)) else: runner.register_hook(DistEvalmAPHook(val_dataset_cfg)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # add prune m = Mask(model) m.init_length() print("-" * 10 + "one epoch begin" + "-" * 10) print("remaining ratio of pruning : Norm is %f" % cfg.prun.rate_norm) print("reducing ratio of pruning : Distance is %f" % cfg.prun.rate_dist) print("total remaining ratio is %f" % (cfg.prun.rate_norm - cfg.prun.rate_dist)) m.model = model m.init_mask(cfg.prun.rate_norm, cfg.prun.rate_dist, cfg) # m.if_zero() m.do_mask() m.do_similar_mask() model = m.model m.if_zero() # normal mode # runner.run(data_loaders, cfg.workflow, cfg.total_epochs) # prune mode runner.run(data_loaders, m, cfg.workflow, cfg.total_epochs, cfg)
def _dist_train(model, dataset, cfg, validate=False, multitask=False, vis=False): # prepare data loaders data_loaders = [[ build_dataloader(d, cfg.data.imgs_per_gpu // 2 if issubclass(d.__class__, datasets.BDDVideo) else cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu // 2 if issubclass(d.__class__, datasets.BDDVideo) else cfg.data.workers_per_gpu, dist=True) for d in dataset ]] if multitask else [ build_dataloader(dataset, cfg.data.imgs_per_gpu // 2 if issubclass(dataset.__class__, datasets.BDDVideo) else cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu // 2 if issubclass(dataset.__class__, datasets.BDDVideo) else cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner optimizer = build_optimizer(model, cfg.optimizer) bp = batch_processor_with_vis if vis else batch_processor runner = MTLRunner(model, bp, optimizer, cfg.work_dir, cfg.log_level) if multitask else \ Runner(model, bp, optimizer, cfg.work_dir, cfg.log_level) # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg) else: optimizer_config = DistOptimizerHook(**cfg.optimizer_config) # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: val_dataset_cfg = cfg.data.val eval_cfg = cfg.get('evaluation', {}) if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook( CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg)) else: if not type(val_dataset_cfg) == list: val_dataset_cfg = [val_dataset_cfg] for _cfg in val_dataset_cfg: dataset_type = getattr(datasets, _cfg.type) if issubclass(dataset_type, datasets.BddStreet) or \ issubclass(dataset_type, datasets.BddSemanticSeg): runner.register_hook(BddSegEvalHook(_cfg, **eval_cfg)) elif issubclass(dataset_type, datasets.CocoDataset): runner.register_hook(CocoDistEvalmAPHook(_cfg, **eval_cfg)) elif issubclass(dataset_type, datasets.BDDVideo): runner.register_hook(BDDEvalHook(_cfg)) else: runner.register_hook(DistEvalmAPHook(_cfg, **eval_cfg)) runner.register_logger_hooks(cfg.log_config) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) if cfg.get('init_asso_head', False): ori_key = cfg.init_asso_head[0] new_key = cfg.init_asso_head[1] for _key in model.module.state_dict().keys(): if 'asso' in _key: exist_key = _key.replace(ori_key, new_key) if exist_key in model.module.state_dict().keys(): if dist.get_rank() == 0: print('Init "{}" with "{}"'.format(_key, exist_key)) model.module.state_dict()[_key].copy_( model.module.state_dict()[exist_key]) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, train_dataset2=None): # prepare data loaders if train_dataset2 is not None: data_loaders = [ build_dataloader(train_dataset2, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True), build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] else: data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True) ] # put model on gpus model = MMDistributedDataParallel(model.cuda()) # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) # register hooks optimizer_config = DistOptimizerHook(**cfg.optimizer_config) runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config) runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: if isinstance(model.module, RPN): # TODO: implement recall hooks for other datasets runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) else: if cfg.data.val.type == 'CocoDataset': runner.register_hook( CocoDistEvalmAPHook(cfg.data.val, cfg.interval)) elif cfg.data.val.type == 'CocoRGBDataset': runner.register_hook(CocoDistEvalmAPHookRGB(cfg.data.val)) elif cfg.data.val.type == 'CocoDatasetRGB2': runner.register_hook( CocoDistEvalmAPHookRGB2(cfg.data.val, cfg.interval)) elif cfg.data.val.type == 'Coco3DDataset' and hasattr( cfg, 'data2') and hasattr(cfg.data2, 'val'): runner.register_hook( CocoDistEvalmAPHook3DMult(cfg.data.val, cfg.data2.val)) elif cfg.data.val.type == 'Coco3DDataset': runner.register_hook( CocoDistEvalmAPHook3D(cfg.data.val, cfg.interval)) elif cfg.data.val.type == 'Coco3DParcelDataset': runner.register_hook( CocoDistEvalmAPHook3DParcel(cfg.data.val, cfg.interval)) elif cfg.data.val.type == 'Coco3D2ScalesDataset': runner.register_hook( CocoDistEvalmAPHook3D(cfg.data.val, cfg.interval, dataset2=cfg.data2_2scales.val)) elif cfg.data.val.type == 'Coco3D3ScalesDataset': runner.register_hook( CocoDistEvalmAPHook3D(cfg.data.val, cfg.interval, dataset3=cfg.data3_3scales.val)) else: runner.register_hook(DistEvalmAPHook(cfg.data.val)) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow, cfg.total_epochs)