def train(cfg): # Set up environment. init_distributed_training(cfg) local_rank_id = get_local_rank() # Set random seed from configs. np.random.seed(cfg.RNG_SEED + 10 * local_rank_id) torch.manual_seed(cfg.RNG_SEED + 10 * local_rank_id) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info('init start') # 迭代轮数从1开始计数 arguments = {"cur_epoch": 1} device = get_device(local_rank_id) model = build_recognizer(cfg, device) criterion = build_criterion(cfg, device) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT_DIR, save_to_disk=True) if cfg.TRAIN.RESUME: logger.info('resume start') extra_checkpoint_data = checkpointer.load(map_location=device) if isinstance(extra_checkpoint_data, dict): arguments['cur_epoch'] = extra_checkpoint_data['cur_epoch'] if cfg.LR_SCHEDULER.IS_WARMUP: logger.info('warmup start') if lr_scheduler.finished: optimizer.load_state_dict( lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict( lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer logger.info('warmup end') logger.info('resume end') data_loader = build_dataloader(cfg, is_train=True) logger.info('init end') synchronize() do_train(cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device)
def test(args): torch.backends.cudnn.benchmark = True logger = logging.setup_logging() device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % 0} # # 计算RGB rgb_cfg = get_cfg_defaults() rgb_cfg.merge_from_file(args.rgb_config_file) rgb_cfg.DATALOADER.TEST_BATCH_SIZE = 16 rgb_cfg.OUTPUT.DIR = args.output rgb_cfg.freeze() rgb_model = build_model(rgb_cfg, 0) rgb_model.eval() checkpointer = CheckPointer(rgb_model, logger=logger) checkpointer.load(args.rgb_pretrained, map_location=map_location) # inference(rgb_cfg, rgb_model, device) # 计算RGBDiff rgbdiff_cfg = get_cfg_defaults() rgbdiff_cfg.merge_from_file(args.rgbdiff_config_file) rgbdiff_cfg.DATALOADER.TEST_BATCH_SIZE = 16 rgbdiff_cfg.OUTPUT.DIR = args.output rgbdiff_cfg.freeze() rgbdiff_model = build_model(rgbdiff_cfg, 0) rgbdiff_model.eval() checkpointer = CheckPointer(rgbdiff_model, logger=logger) checkpointer.load(args.rgbdiff_pretrained, map_location=map_location) inference(rgb_cfg, rgb_model, rgbdiff_cfg, rgbdiff_model, device)
def test(cfg): torch.backends.cudnn.benchmark = True logger = setup_logger('TEST') device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % 0} model = build_model(cfg, map_location=map_location).to(device) if cfg.MODEL.PRETRAINED != "": if logger: logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model, logger=logger) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=map_location) do_evaluation(cfg, model, device)
def train(gpu, args, cfg): rank = args.nr * args.gpus + gpu setup(rank, args.world_size) logger = setup_logger(cfg.TRAIN.NAME) arguments = {"iteration": 0} torch.cuda.set_device(gpu) device = torch.device(f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % rank} model = build_model(cfg, gpu, map_location=map_location) criterion = build_criterion(cfg) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT.DIR, save_to_disk=True, logger=logger) if args.resume: if is_master_proc(): logger.info('resume ...') extra_checkpoint_data = checkpointer.load(map_location=map_location, rank=rank) if extra_checkpoint_data != dict(): arguments['iteration'] = extra_checkpoint_data['iteration'] if cfg.LR_SCHEDULER.IS_WARMUP: if is_master_proc(): logger.info('warmup ...') if lr_scheduler.finished: optimizer.load_state_dict(lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict(lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer data_loader = build_dataloader(cfg, is_train=True, start_iter=arguments['iteration']) synchronize() do_train(args, cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device, logger) cleanup()
def build_recognizer(cfg, device): world_size = du.get_world_size() model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME](cfg).to( device=device) if cfg.MODEL.NORM.SYNC_BN and world_size > 1: logger.info("start sync BN on the process group of {}".format( du._LOCAL_RANK_GROUP)) convert_sync_bn(model, du._LOCAL_PROCESS_GROUP) if cfg.MODEL.PRETRAINED != "": logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=device) logger.info("finish loading model weights") if du.get_world_size() > 1: model = DDP(model, device_ids=[device], output_device=device, find_unused_parameters=True) return model
def build_model(cfg, gpu, map_location=None, logger=None): model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME]( cfg, map_location=map_location).cuda(gpu) world_size = du.get_world_size() rank = du.get_rank() if cfg.MODEL.SYNC_BN and world_size > 1: process_group = simple_group_split(world_size, rank, 1) convert_sync_bn(model, process_group, gpu=gpu) if cfg.MODEL.PRETRAINED != "": if du.is_master_proc() and logger: logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model, logger=logger) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=map_location, rank=rank) if du.get_world_size() > 1: model = DDP(model, device_ids=[gpu], output_device=gpu, find_unused_parameters=True) return model
def train(gpu, args, cfg): rank = args.nr * args.gpus + gpu setup(rank, args.world_size, args.gpus) logger = setup_logger(cfg.TRAIN.NAME) arguments = {"iteration": 0} arguments['rank'] = rank device = torch.device( f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % rank} model = build_model(cfg, map_location=map_location).to(device) if cfg.MODEL.PRETRAINED != "": if rank == 0 and logger: logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model, logger=logger) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=map_location, rank=rank) if args.gpus > 1: model = DDP(model, device_ids=[gpu], find_unused_parameters=True) criterion = build_criterion(cfg) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT.DIR, save_to_disk=True, logger=logger) if args.resume: if rank == 0: logger.info('resume ...') extra_checkpoint_data = checkpointer.load(map_location=map_location, rank=rank) if extra_checkpoint_data != dict(): arguments['iteration'] = extra_checkpoint_data['iteration'] if cfg.LR_SCHEDULER.WARMUP: if rank == 0: logger.info('warmup ...') if lr_scheduler.finished: optimizer.load_state_dict( lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict( lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer data_loader = build_dataloader(cfg, train=True, start_iter=arguments['iteration'], world_size=args.world_size, rank=rank) model = do_train(args, cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device, logger) if rank == 0 and not args.stop_eval: logger.info('Start final evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, device) cleanup()