Пример #1
0
def setup_logging(name, output_dir=None):
    """
    Sets up the logging for multiple processes. Only enable the logging for the
    master process, and suppress logging for the non-master processes.
    """
    if not du.is_master_proc(du.get_world_size()):
        # Suppress logging for non-master processes.
        _suppress_print()
        logger = NllLogger(f'{name}.{du.get_rank()}')
        return logger

    logger = logging.getLogger(name)
    logging.root.handlers = []
    for handler in logger.handlers:
        logger.removeHandler(handler)

    logger.setLevel(logging.DEBUG)
    logger.propagate = False

    plain_formatter = logging.Formatter(
        "[%(asctime)s][%(levelname)s] %(filename)s: %(lineno)3d: %(message)s",
        datefmt="%m/%d %H:%M:%S",
    )
    ch = logging.StreamHandler(stream=sys.stdout)
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(plain_formatter)
    logger.addHandler(ch)

    if output_dir:
        fh = logging.FileHandler(os.path.join(output_dir, 'log.txt'))
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(plain_formatter)
        logger.addHandler(fh)

    return logger
Пример #2
0
def setup_logging(output_dir=None):
    """
    Sets up the logging for multiple processes. Only enable the logging for the
    master process, and suppress logging for the non-master processes.
    """
    # Set up logging format.
    _FORMAT = "[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s"

    if du.is_master_proc():
        # Enable logging for the master process.
        logging.root.handlers = []
    else:
        # Suppress logging for non-master processes.
        _suppress_print()

    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    logger.propagate = False
    plain_formatter = logging.Formatter(
        "[%(asctime)s][%(levelname)s] %(filename)s: %(lineno)3d: %(message)s",
        datefmt="%m/%d %H:%M:%S",
    )

    if du.is_master_proc():
        ch = logging.StreamHandler(stream=sys.stdout)
        ch.setLevel(logging.DEBUG)
        ch.setFormatter(plain_formatter)
        logger.addHandler(ch)

    if output_dir is not None and du.is_master_proc(du.get_world_size()):
        filename = os.path.join(output_dir, "stdout.log")
        fh = logging.FileHandler(filename)
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(plain_formatter)
        logger.addHandler(fh)
Пример #3
0
def build_dataloader(cfg, is_train=True, start_iter=0):
    transform = build_transform(cfg, is_train=is_train)
    dataset = build_dataset(cfg, transform=transform, is_train=is_train)

    if is_train:
        batch_size = cfg.DATALOADER.TRAIN_BATCH_SIZE

        world_size = du.get_world_size()
        rank = du.get_rank()
        if world_size != 1 and rank == 0:
            sampler = DistributedSampler(dataset,
                                         num_replicas=world_size,
                                         rank=rank)
        else:
            # 训练阶段使用随机采样器
            sampler = torch.utils.data.RandomSampler(dataset)
    else:
        batch_size = cfg.DATALOADER.TEST_BATCH_SIZE
        sampler = torch.utils.data.sampler.SequentialSampler(dataset)

    batch_sampler = torch.utils.data.sampler.BatchSampler(
        sampler=sampler, batch_size=batch_size, drop_last=False)
    if is_train:
        batch_sampler = IterationBasedBatchSampler(
            batch_sampler,
            num_iterations=cfg.TRAIN.MAX_ITER,
            start_iter=start_iter)

    data_loader = DataLoader(dataset,
                             num_workers=cfg.DATALOADER.NUM_WORKERS,
                             batch_sampler=batch_sampler,
                             pin_memory=True)

    return data_loader
Пример #4
0
def build_dataloader(cfg, is_train=True):
    transform = build_transform(cfg, is_train=is_train)
    dataset = build_dataset(cfg, transform=transform, is_train=is_train)

    world_size = du.get_world_size()
    num_gpus = cfg.NUM_GPUS
    rank = du.get_rank()
    if is_train:
        batch_size = cfg.DATALOADER.TRAIN_BATCH_SIZE

        if num_gpus > 1:
            sampler = DistributedSampler(dataset,
                                         num_replicas=world_size,
                                         rank=rank,
                                         shuffle=True)
        else:
            sampler = RandomSampler(dataset)
    else:
        batch_size = cfg.DATALOADER.TEST_BATCH_SIZE
        if num_gpus > 1:
            sampler = DistributedSampler(dataset,
                                         num_replicas=world_size,
                                         rank=rank,
                                         shuffle=False)
        else:
            sampler = SequentialSampler(dataset)

    data_loader = DataLoader(dataset,
                             num_workers=cfg.DATALOADER.NUM_WORKERS,
                             sampler=sampler,
                             batch_size=batch_size,
                             drop_last=False,
                             pin_memory=True)

    return data_loader
Пример #5
0
def build_recognizer(cfg, device):
    world_size = du.get_world_size()

    model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME](cfg).to(
        device=device)

    if cfg.MODEL.NORM.SYNC_BN and world_size > 1:
        logger.info("start sync BN on the process group of {}".format(
            du._LOCAL_RANK_GROUP))
        convert_sync_bn(model, du._LOCAL_PROCESS_GROUP)
    if cfg.MODEL.PRETRAINED != "":
        logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}')
        checkpointer = CheckPointer(model)
        checkpointer.load(cfg.MODEL.PRETRAINED, map_location=device)
        logger.info("finish loading model weights")

    if du.get_world_size() > 1:
        model = DDP(model,
                    device_ids=[device],
                    output_device=device,
                    find_unused_parameters=True)

    return model
Пример #6
0
def build_model(cfg, gpu, map_location=None, logger=None):
    model = registry.RECOGNIZER[cfg.MODEL.RECOGNIZER.NAME](
        cfg, map_location=map_location).cuda(gpu)

    world_size = du.get_world_size()
    rank = du.get_rank()
    if cfg.MODEL.SYNC_BN and world_size > 1:
        process_group = simple_group_split(world_size, rank, 1)
        convert_sync_bn(model, process_group, gpu=gpu)
    if cfg.MODEL.PRETRAINED != "":
        if du.is_master_proc() and logger:
            logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}')
        checkpointer = CheckPointer(model, logger=logger)
        checkpointer.load(cfg.MODEL.PRETRAINED,
                          map_location=map_location,
                          rank=rank)

    if du.get_world_size() > 1:
        model = DDP(model,
                    device_ids=[gpu],
                    output_device=gpu,
                    find_unused_parameters=True)

    return model