示例#1
0
def test(model, datasets, cfg, rank):
    data_loaders = [get_loader(ds, cfg) for ds in datasets]
    runner = EpochBasedRunner(model=model,
                              optimizers_cfg=cfg.optimizers,
                              work_dir=cfg.work_dir)
    runner.load_checkpoint(cfg.load_from, load_optim=False)
    runner.run(data_loaders, cfg.workflow, 1)
示例#2
0
def train(model, datasets, cfg, rank):
    data_loaders = [get_loader(ds, cfg, 'train') for ds in datasets]
    runner = EpochBasedRunner(model=model,
                              optimizers_cfg=cfg.optimizers,
                              work_dir=cfg.work_dir)

    runner.create_gradmanager_and_optimizers()

    if cfg.resume_from is not None:
        runner.resume(cfg.resume_from, cfg.get('resume_optim', True))
    elif cfg.load_from is not None:
        runner.load_checkpoint(cfg.load_from, load_optim=False)
    else:
        pass

    runner.sync_model_params()

    # register some useful hooks
    runner.register_training_hooks(lr_config=cfg.lr_config,
                                   checkpoint_config=cfg.checkpoint_config,
                                   log_config=cfg.log_config)

    # register evaluation hook
    if cfg.get('evaluation', None) is not None:
        dataset = build_dataset(cfg.data.eval)
        save_path = os.path.join(cfg.work_dir, 'eval_visuals')
        log_path = os.path.join(cfg.work_dir, 'eval.log')
        runner.register_hook(
            EvalIterHook(get_loader(dataset, cfg, 'eval'),
                         save_path=save_path,
                         log_path=log_path,
                         **cfg.evaluation))

    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
示例#3
0
def train(model, datasets, cfg, rank):
    data_loaders = []
    for ds in datasets:
        data_loaders.append(get_loader(ds, cfg, 'train'))

    # build runner for training
    if cfg.get('total_iters', None) is not None:
        runner = IterBasedRunner(model=model,
                                 optimizers_cfg=cfg.optimizers,
                                 work_dir=cfg.work_dir)
        total_iters_or_epochs = cfg.total_iters
    else:
        runner = EpochBasedRunner(model=model,
                                  optimizers_cfg=cfg.optimizers,
                                  work_dir=cfg.work_dir)
        assert cfg.get('total_epochs', None) is not None
        total_iters_or_epochs = cfg.total_epochs

    # resume and create optimizers
    if cfg.resume_from is not None:
        # 恢复之前的训练(包括模型参数和优化器)
        runner.resume(cfg.resume_from, cfg.get('resume_optim', False))
    elif cfg.load_from is not None:
        # 假装从头开始训练, rank0 进程加载参数,然后每个进程创建optim,调用optim init时,模型参数会自动同步
        runner.load_checkpoint(cfg.load_from, load_optim=False)
        runner.create_optimizers()
    else:
        # 不加载任何参数,每个进程直接创建optimizers
        runner.create_optimizers()

    # register hooks
    runner.register_training_hooks(lr_config=cfg.lr_config,
                                   checkpoint_config=cfg.checkpoint_config,
                                   log_config=cfg.log_config)

    # visual hook
    if cfg.get('visual_config', None) is not None:
        cfg.visual_config['output_dir'] = os.path.join(
            cfg.work_dir, cfg.visual_config['output_dir'])
        runner.register_hook(build_from_cfg(cfg.visual_config, HOOKS))

    # evaluation hook
    if cfg.get('evaluation', None) is not None:
        dataset = build_dataset(cfg.data.eval)
        save_path = os.path.join(cfg.work_dir, 'eval_visuals')
        log_path = cfg.work_dir
        runner.register_hook(
            EvalIterHook(get_loader(dataset, cfg, 'eval'),
                         save_path=save_path,
                         log_path=log_path,
                         **cfg.evaluation))

    runner.run(data_loaders, cfg.workflow, total_iters_or_epochs)
示例#4
0
def test(model, datasets, cfg, rank):
    data_loaders = []
    for ds in datasets:
        data_loaders.append(get_loader(ds, cfg))

    # build epoch runner for test
    runner = EpochBasedRunner(model=model,
                              optimizers_cfg=cfg.optimizers,
                              work_dir=cfg.work_dir)

    # load from
    if cfg.load_from is not None:
        runner.load_checkpoint(cfg.load_from, load_optim=False)
        runner.create_optimizers()
    else:
        raise RuntimeError("cfg.load_from should not be None for test")

    runner.run(data_loaders, cfg.workflow, 8 if cfg.ensemble else 1)
示例#5
0
def train(model, datasets, cfg, rank):
    data_loaders = [get_loader(ds, cfg, 'train') for ds in datasets]
    runner = EpochBasedRunner(model=model,
                              optimizers_cfg=cfg.optimizers,
                              work_dir=cfg.work_dir)

    runner.create_gradmanager_and_optimizers(
    )  # 每个进程均创建gm和optimizers, 均是model的属性

    if cfg.resume_from is not None:
        # 恢复之前的训练,即epoch数目(包括模型参数和优化器)。若多卡训练则只有rank 0进程对模型加载参数(后面会同步)。如果resume optim,则每个进程均会load optim state.
        runner.resume(cfg.resume_from, cfg.get('resume_optim', True))
    elif cfg.load_from is not None:
        # 加载参数,但假装从头开始训练。若多卡训练则只有rank 0进程对模型加载参数 (后面会同步)。
        runner.load_checkpoint(cfg.load_from, load_optim=False)
    else:
        pass  # 不加载任何参数,从头训练

    # 对模型参数进行同步
    runner.sync_model_params()

    # register some useful hooks
    runner.register_training_hooks(lr_config=cfg.lr_config,
                                   checkpoint_config=cfg.checkpoint_config,
                                   log_config=cfg.log_config)

    # register evaluation hook
    if cfg.get('evaluation', None) is not None:
        dataset = build_dataset(cfg.data.eval)
        save_path = os.path.join(cfg.work_dir, 'eval_visuals')
        log_path = os.path.join(cfg.work_dir, 'eval.log')
        runner.register_hook(
            EvalIterHook(get_loader(dataset, cfg, 'eval'),
                         save_path=save_path,
                         log_path=log_path,
                         **cfg.evaluation))

    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)