def setup_logging(name, output_dir=None): """ Sets up the logging for multiple processes. Only enable the logging for the master process, and suppress logging for the non-master processes. """ if not du.is_master_proc(du.get_world_size()): # Suppress logging for non-master processes. _suppress_print() logger = NllLogger(f'{name}.{du.get_rank()}') return logger logger = logging.getLogger(name) logging.root.handlers = [] for handler in logger.handlers: logger.removeHandler(handler) logger.setLevel(logging.DEBUG) logger.propagate = False plain_formatter = logging.Formatter( "[%(asctime)s][%(levelname)s] %(filename)s: %(lineno)3d: %(message)s", datefmt="%m/%d %H:%M:%S", ) ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) ch.setFormatter(plain_formatter) logger.addHandler(ch) if output_dir: fh = logging.FileHandler(os.path.join(output_dir, 'log.txt')) fh.setLevel(logging.DEBUG) fh.setFormatter(plain_formatter) logger.addHandler(fh) return logger
def setup_logging(output_dir=None): """ Sets up the logging for multiple processes. Only enable the logging for the master process, and suppress logging for the non-master processes. """ # Set up logging format. _FORMAT = "[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s" if du.is_master_proc(): # Enable logging for the master process. logging.root.handlers = [] else: # Suppress logging for non-master processes. _suppress_print() logger = logging.getLogger() logger.setLevel(logging.DEBUG) logger.propagate = False plain_formatter = logging.Formatter( "[%(asctime)s][%(levelname)s] %(filename)s: %(lineno)3d: %(message)s", datefmt="%m/%d %H:%M:%S", ) if du.is_master_proc(): ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) ch.setFormatter(plain_formatter) logger.addHandler(ch) if output_dir is not None and du.is_master_proc(du.get_world_size()): filename = os.path.join(output_dir, "stdout.log") fh = logging.FileHandler(filename) fh.setLevel(logging.DEBUG) fh.setFormatter(plain_formatter) logger.addHandler(fh)
def build_dataloader(cfg, is_train=True, start_iter=0): transform = build_transform(cfg, is_train=is_train) dataset = build_dataset(cfg, transform=transform, is_train=is_train) if is_train: batch_size = cfg.DATALOADER.TRAIN_BATCH_SIZE world_size = du.get_world_size() rank = du.get_rank() if world_size != 1 and rank == 0: sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank) else: # 训练阶段使用随机采样器 sampler = torch.utils.data.RandomSampler(dataset) else: batch_size = cfg.DATALOADER.TEST_BATCH_SIZE sampler = torch.utils.data.sampler.SequentialSampler(dataset) batch_sampler = torch.utils.data.sampler.BatchSampler( sampler=sampler, batch_size=batch_size, drop_last=False) if is_train: batch_sampler = IterationBasedBatchSampler( batch_sampler, num_iterations=cfg.TRAIN.MAX_ITER, start_iter=start_iter) data_loader = DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, pin_memory=True) return data_loader
def build_dataloader(cfg, is_train=True): transform = build_transform(cfg, is_train=is_train) dataset = build_dataset(cfg, transform=transform, is_train=is_train) world_size = du.get_world_size() num_gpus = cfg.NUM_GPUS rank = du.get_rank() if is_train: batch_size = cfg.DATALOADER.TRAIN_BATCH_SIZE if num_gpus > 1: sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=True) else: sampler = RandomSampler(dataset) else: batch_size = cfg.DATALOADER.TEST_BATCH_SIZE if num_gpus > 1: sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False) else: sampler = SequentialSampler(dataset) data_loader = DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, sampler=sampler, batch_size=batch_size, drop_last=False, pin_memory=True) return data_loader
def build_recognizer(cfg, device): world_size = du.get_world_size() model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME](cfg).to( device=device) if cfg.MODEL.NORM.SYNC_BN and world_size > 1: logger.info("start sync BN on the process group of {}".format( du._LOCAL_RANK_GROUP)) convert_sync_bn(model, du._LOCAL_PROCESS_GROUP) if cfg.MODEL.PRETRAINED != "": logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=device) logger.info("finish loading model weights") if du.get_world_size() > 1: model = DDP(model, device_ids=[device], output_device=device, find_unused_parameters=True) return model
def build_model(cfg, gpu, map_location=None, logger=None): model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME]( cfg, map_location=map_location).cuda(gpu) world_size = du.get_world_size() rank = du.get_rank() if cfg.MODEL.SYNC_BN and world_size > 1: process_group = simple_group_split(world_size, rank, 1) convert_sync_bn(model, process_group, gpu=gpu) if cfg.MODEL.PRETRAINED != "": if du.is_master_proc() and logger: logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model, logger=logger) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=map_location, rank=rank) if du.get_world_size() > 1: model = DDP(model, device_ids=[gpu], output_device=gpu, find_unused_parameters=True) return model