Exemplo n.º 1
0
def test_train_model():
    model = ExampleModel()
    dataset = ExampleDataset()
    datasets = [ExampleDataset(), ExampleDataset()]
    cfg = dict(seed=0,
               gpus=1,
               gpu_ids=[0],
               resume_from=None,
               load_from=None,
               workflow=[('train', 1)],
               total_epochs=5,
               evaluation=dict(interval=1, key_indicator='acc'),
               data=dict(videos_per_gpu=1,
                         workers_per_gpu=0,
                         val=dict(type='ExampleDataset')),
               optimizer=dict(type='SGD',
                              lr=0.01,
                              momentum=0.9,
                              weight_decay=0.0001),
               optimizer_config=dict(grad_clip=dict(max_norm=40, norm_type=2)),
               lr_config=dict(policy='step', step=[40, 80]),
               omnisource=False,
               precise_bn=False,
               checkpoint_config=dict(interval=1),
               log_level='INFO',
               log_config=dict(interval=20,
                               hooks=[dict(type='TextLoggerHook')]))

    with tempfile.TemporaryDirectory() as tmpdir:
        # normal train
        cfg['work_dir'] = tmpdir
        config = Config(cfg)
        train_model(model, dataset, config)

    with tempfile.TemporaryDirectory() as tmpdir:
        # train with validation
        cfg['work_dir'] = tmpdir
        config = Config(cfg)
        train_model(model, dataset, config, validate=True)

    with tempfile.TemporaryDirectory() as tmpdir:
        # train with Fp16OptimizerHook
        cfg['work_dir'] = tmpdir
        cfg['fp16'] = dict(loss_scale=512.)
        config = Config(cfg)
        model.fp16_enabled = None
        train_model(model, dataset, config)

    with tempfile.TemporaryDirectory() as tmpdir:
        cfg['work_dir'] = tmpdir
        cfg['omnisource'] = True
        config = Config(cfg)
        train_model(model, datasets, config)

    with tempfile.TemporaryDirectory() as tmpdir:
        # train with precise_bn on
        cfg['work_dir'] = tmpdir
        cfg['precise_bn'] = dict(num_iters=1, interval=1)
        config = Config(cfg)
        train_model(model, datasets, config)
Exemplo n.º 2
0
 def Train(self):
     self.setup();
     print("Starting to train ...");
     train_model(self.system_dict["local"]["model"], 
                 self.system_dict["local"]["datasets"], 
                 self.system_dict["local"]["cfg"], 
                 distributed=self.system_dict["params"]["distributed"], 
                 validate=self.system_dict["params"]["val_dataset"])
     print("Done");
     print("Creating inference config file");
     cfg_infer = Config.fromfile(self.system_dict["params"]["inference_config_file"]);
     cfg_infer.model.cls_head.num_classes = self.system_dict["params"]["num_classes"]
     cfg_infer.dump(self.system_dict["local"]["cfg"].work_dir + "/config.py");
     print("Done");
Exemplo n.º 3
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    cfg.merge_from_dict(args.cfg_options)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # work_dir is determined in this priority:
    # CLI > config file > default (base filename)
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0])
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids
    else:
        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # The flag is used to determine whether it is omnisource training
    cfg.setdefault('omnisource', False)

    # The flag is used to register module's hooks
    cfg.setdefault('module_hooks', [])

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
                dash_line)
    meta['env_info'] = env_info

    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config: {cfg.text}')

    # set random seeds
    if args.seed is not None:
        logger.info(f'Set random seed to {args.seed}, '
                    f'deterministic: {args.deterministic}')
        set_random_seed(args.seed, deterministic=args.deterministic)
    cfg.seed = args.seed
    meta['seed'] = args.seed
    meta['config_name'] = osp.basename(args.config)
    meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))

    model = build_model(cfg.model,
                        train_cfg=cfg.get('train_cfg'),
                        test_cfg=cfg.get('test_cfg'))

    register_module_hooks(model.backbone, cfg.module_hooks)

    if cfg.omnisource:
        # If omnisource flag is set, cfg.data.train should be a list
        assert type(cfg.data.train) is list
        datasets = [build_dataset(dataset) for dataset in cfg.data.train]
    else:
        datasets = [build_dataset(cfg.data.train)]

    if len(cfg.workflow) == 2:
        # For simplicity, omnisource is not compatiable with val workflow,
        # we recommend you to use `--validate`
        assert not cfg.omnisource
        if args.validate:
            warnings.warn('val workflow is duplicated with `--validate`, '
                          'it is recommended to use `--validate`. see '
                          'https://github.com/open-mmlab/mmaction2/pull/123')
        val_dataset = copy.deepcopy(cfg.data.val)
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmaction version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmaction_version=__version__ +
                                          get_git_hash(digits=7),
                                          config=cfg.text)

    train_model(model,
                datasets,
                cfg,
                distributed=distributed,
                validate=args.validate,
                timestamp=timestamp,
                meta=meta)
Exemplo n.º 4
0
def main():
    # parse arguments
    args = parse_args()

    # load config
    cfg = Config.fromfile(args.config)
    if args.update_config is not None:
        cfg.merge_from_dict(args.update_config)
    cfg = update_config(cfg, args)
    cfg = propagate_root_dir(cfg, args.data_dir)

    # init distributed env first, since logger depends on the dist info.
    distributed = args.launcher != 'none'
    if distributed:
        init_dist(args.launcher, **cfg.dist_params)

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

    # init logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()

    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line)
    meta['env_info'] = env_info

    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config: {cfg.text}')

    if cfg.get('nncf_config'):
        check_nncf_is_enabled()
        logger.info('NNCF config: {}'.format(cfg.nncf_config))
        meta.update(get_nncf_metadata())

    # set random seeds
    cfg.seed = args.seed
    meta['seed'] = args.seed
    if cfg.get('seed'):
        logger.info(f'Set random seed to {cfg.seed}, deterministic: {args.deterministic}')
        set_random_seed(cfg.seed, deterministic=args.deterministic)

    # build datasets
    datasets = [build_dataset(cfg.data, 'train', dict(logger=logger))]
    logger.info(f'Train datasets:\n{str(datasets[0])}')

    if len(cfg.workflow) == 2:
        if not args.no_validate:
            warnings.warn('val workflow is duplicated with `--validate`, '
                          'it is recommended to use `--validate`. see '
                          'https://github.com/open-mmlab/mmaction2/pull/123')
        datasets.append(build_dataset(copy.deepcopy(cfg.data), 'val', dict(logger=logger)))
        logger.info(f'Val datasets:\n{str(datasets[-1])}')

    # filter dataset labels
    if cfg.get('classes'):
        datasets = [dataset.filter(cfg.classes) for dataset in datasets]

    # build model
    model = build_model(
        cfg.model,
        train_cfg=cfg.train_cfg,
        test_cfg=cfg.test_cfg,
        class_sizes=datasets[0].class_sizes,
        class_maps=datasets[0].class_maps
    )

    # define ignore layers
    ignore_prefixes = []
    if hasattr(cfg, 'reset_layer_prefixes') and isinstance(cfg.reset_layer_prefixes, (list, tuple)):
        ignore_prefixes += cfg.reset_layer_prefixes
    ignore_suffixes = ['num_batches_tracked']
    if hasattr(cfg, 'reset_layer_suffixes') and isinstance(cfg.reset_layer_suffixes, (list, tuple)):
        ignore_suffixes += cfg.reset_layer_suffixes

    # train model
    train_model(
        model,
        datasets,
        cfg,
        distributed=distributed,
        validate=(not args.no_validate),
        timestamp=timestamp,
        meta=meta,
        ignore_prefixes=tuple(ignore_prefixes),
        ignore_suffixes=tuple(ignore_suffixes)
    )
Exemplo n.º 5
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # work_dir is determined in this priority:
    # CLI > config file > default (base filename)
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0])
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids
    else:
        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
                dash_line)
    meta['env_info'] = env_info

    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config: {cfg.text}')

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}, deterministic: {}'.format(
            args.seed, args.deterministic))
        set_random_seed(args.seed, deterministic=args.deterministic)
    cfg.seed = args.seed
    meta['seed'] = args.seed

    model = build_model(cfg.model,
                        train_cfg=cfg.train_cfg,
                        test_cfg=cfg.test_cfg)

    datasets = [build_dataset(cfg.data.train)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmaction version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmaction_version=__version__,
                                          config=cfg.text)

    train_model(model,
                datasets,
                cfg,
                distributed=distributed,
                validate=args.validate,
                timestamp=timestamp,
                meta=meta)
Exemplo n.º 6
0
# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.

cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 15

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)