示例#1
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        test(cfg, model, args.distributed)
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.deprecated.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
示例#3
0
def train(cfg, train_dir, local_rank, distributed, logger):

    # build model
    model = build_siammot(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            broadcast_buffers=False,
            find_unused_parameters=True)

    arguments = {}
    arguments["iteration"] = 0

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         train_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = build_train_data_loader(
        cfg,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    tensorboard_writer = TensorboardWriter(cfg, train_dir)

    do_train(model, data_loader, optimizer, scheduler, checkpointer, device,
             checkpoint_period, arguments, logger, tensorboard_writer)

    return model
示例#4
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank
            # this should be removed if we update BatchNorm stats
            #broadcast_buffers=False,find_unused_parameters=True
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(model, data_loader, optimizer, scheduler, checkpointer, device,
             checkpoint_period, arguments, local_rank)

    return model
示例#5
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.deprecated.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
示例#6
0
def train(cfg, local_rank, distributed, save_path='.', writer=None):
    # cfg.SOLVER.IMS_PER_BATCH =3# force it to 3
    model_s = build_detection_model(cfg, is_student=True)
    model_t = build_detection_model(cfg, is_teacher=True)
    device_t = torch.device('cuda:0')
    device_s = torch.device('cuda:0')
    model_s.to(device_s)
    model_t.to(device_t)
    optimizer = make_optimizer(cfg, model_s)
    scheduler = make_lr_scheduler(cfg, optimizer)
    output_dir = save_path
    save_to_disk = get_rank() == 0
    checkpointer_s = DetectronCheckpointer(cfg, model_s, optimizer, scheduler,
                                           output_dir, save_to_disk)
    checkpointer_t = DetectronCheckpointer(cfg,
                                           model_t,
                                           optimizer=None,
                                           scheduler=scheduler,
                                           save_dir=output_dir,
                                           save_to_disk=save_to_disk)
    _init_weight = 'e2e_mask_rcnn_R_50_FPN_1x.pth'
    _ = checkpointer_s.load(_init_weight, True)
    _ = checkpointer_t.load(_init_weight, True)
    sourceDataLoader = make_mt_data_loader(cfg,
                                           is_train=True,
                                           is_distributed=distributed,
                                           start_iter=0,
                                           mode='source',
                                           img_ratio=1 / 2)
    data_loader_dict = {
        'source': sourceDataLoader,
    }
    if cfg.DATASETS.NO_LABEL:
        noLabelDataLoader = make_mt_data_loader(cfg,
                                                is_train=True,
                                                is_distributed=distributed,
                                                start_iter=0,
                                                mode='no_label',
                                                img_ratio=1 / 2)

        data_loader_dict.update({'no_label': noLabelDataLoader})
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    trainer = MTtrainer(model_s,model_t,data_loader_dict,optimizer,
                        scheduler, checkpointer_s,checkpointer_t,\
              checkpoint_period, cfg)

    trainer.train()
    return model_s
示例#7
0
def main():
    args = parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    ### Training Setups ###
    args.run_name = get_run_name() + '_step'
    output_dir = get_output_dir(args, args.run_name, args.output_dir)
    args.cfg_filename = os.path.basename(args.config_file)
    cfg.OUTPUT_DIR = output_dir
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(
        cfg=cfg,
        local_rank=args.local_rank,
        distributed=args.distributed,
        use_tensorboard=args.use_tensorboard
    )
    if not args.skip_test:
        test(cfg, model, args.distributed)
示例#8
0
def train(cfg, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    warmup_layers = tuple(x for x in cfg.SOLVER.WARMUP_LAYERS if len(x) != 0)
    warmup_iters = cfg.SOLVER.WARMUP_ITERS

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        warmup_layers,
        warmup_iters
    )
    return model
def train(cfg):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    print(model)

    model3 = predicate_model(cfg)
    model3.to(cfg.MODEL.DEVICE)
    print(model3)

    optimizer = make_optimizer(cfg, model3)
    scheduler = make_lr_scheduler(cfg, optimizer)

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model3, optimizer, scheduler,
                                         cfg.OUTPUT_DIR, save_to_disk)

    backbone_parameters = torch.load(os.getcwd() + cfg.CONFIG.backbone_weight,
                                     map_location=torch.device("cpu"))
    newdict = {}
    newdict['model'] = removekey(backbone_parameters['model'], [])
    load_state_dict(model, newdict.pop("model"))

    arguments = {}
    arguments["iteration"] = 0
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=False,
        start_iter=arguments["iteration"],
    )
    predicate_train(
        model,
        model3,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model, model3
示例#10
0
def main(args):
    cfg.merge_from_file(args.config_file)

    num_gpus = get_num_gpus()
    DatasetCatalog = None

    # train_dataset = cfg.DATASETS.TRAIN[0]
    # paths_catalog = import_file(
    #     "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    # )
    # data = json.load(open(paths_catalog.DatasetCatalog.DATASETS[train_dataset]['ann_file']))
    # iters_per_epoch = len(data['images'])
    # cfg.defrost()
    # iters_per_epoch = math.ceil(iters_per_epoch / cfg.SOLVER.IMS_PER_BATCH)
    # cfg.SOLVER.MAX_ITER = round(args.epochs * args.scale * iters_per_epoch)
    # cfg.SOLVER.STEPS = (round(8 * args.scale * iters_per_epoch),
    #                     round(11 * args.scale * iters_per_epoch),
    #                     round(16 * args.scale * iters_per_epoch))
    # cfg.SOLVER.IMS_PER_BATCH = num_gpus * 4
    # cfg.TEST.IMS_PER_BATCH = num_gpus * 16
    # cfg.freeze()

    mkdir(cfg.OUTPUT_DIR)

    if args.vis_title is None:
        args.vis_title = os.path.basename(cfg.OUTPUT_DIR)

    logger = setup_logger("maskrcnn_benchmark", cfg.OUTPUT_DIR, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    logger.info(DatasetCatalog)

    model = network.train(cfg, args, DatasetCatalog)
    network.test(cfg, args, model=model, DatasetCatalog=DatasetCatalog)
示例#11
0
def forward_backward(model,
                     images,
                     targets,
                     optimizer,
                     arguments,
                     checkpointer,
                     use_hvd,
                     meters,
                     device,
                     loss_scalar,
                     no_update=False):
    loss_dict = model(images, targets)

    losses = sum(loss for loss in loss_dict.values()) * loss_scalar
    if losses != losses:
        logging.info('NaN encountered!')
        arguments['images'] = images
        arguments['targets'] = targets
        checkpointer.save("NaN_context_{}".format(get_rank()), **arguments)
        raise RuntimeError('NaN encountered!')

    # reduce losses over all GPUs for logging purposes
    if not use_hvd:
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)
    else:
        losses_reduced = sum(loss for loss in loss_dict.values())
        meters.update(loss=losses_reduced, **loss_dict)

    # Note: If mixed precision is not used, this ends up doing nothing
    # Otherwise apply loss scaling for mixed-precision recipe
    if not no_update:
        if device.type == 'cpu':
            losses.backward()
        else:
            if not use_hvd:
                from apex import amp
                with amp.scale_loss(losses, optimizer) as scaled_losses:
                    scaled_losses.backward()
            else:
                losses.backward()
示例#12
0
 def load_parameters(self):
     if self.distributed:
         torch.cuda.set_device(self.local_rank)
         torch.distributed.init_process_group(
             backend="nccl", init_method="env://"
         )
         synchronize()
     self.cfg.merge_from_file(self.config_file)
     self.icwt_21_objs = True if str(21) in self.cfg.DATASETS.TRAIN[0] else False
     if self.cfg.OUTPUT_DIR:
         mkdir(self.cfg.OUTPUT_DIR)
     logger = setup_logger("maskrcnn_benchmark", self.cfg.OUTPUT_DIR, get_rank())
     logger.info("Using {} GPUs".format(self.num_gpus))
     logger.info("Collecting env info (might take some time)")
     logger.info("\n" + collect_env_info())
     logger.info("Loaded configuration file {}".format(self.config_file))
     with open(self.config_file, "r") as cf:
         config_str = "\n" + cf.read()
         logger.info(config_str)
     logger.info("Running with config:\n{}".format(self.cfg))
示例#13
0
def _log_print(logger, *args, **kwargs):
    """
    Wrapper for MLPerf compliance logging calls.
    All arguments but 'log_all_ranks' are passed to
    mlperf_logging.mllog.
    If 'log_all_ranks' is set to True then all distributed workers will print
    logging message, if set to False then only worker with rank=0 will print
    the message.
    """
    if 'stack_offset' not in kwargs:
        kwargs['stack_offset'] = 3
    if 'value' not in kwargs:
        kwargs['value'] = None

    if kwargs.pop('log_all_ranks', False):
        log = True
    else:
        log = (get_rank() == 0)

    if log:
        logger(*args, **kwargs)
示例#14
0
 def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
     if num_replicas is None:
         if not dist.is_available():
             raise RuntimeError(
                 "Requires distributed package to be available")
         # num_replicas = dist.get_world_size()
         num_replicas = get_world_size()
     if rank is None:
         if not dist.is_available():
             raise RuntimeError(
                 "Requires distributed package to be available")
         # rank = dist.get_rank()
         rank = get_rank()
     self.dataset = dataset
     self.num_replicas = num_replicas
     self.rank = rank
     self.epoch = 0
     self.num_samples = int(
         math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
     self.total_size = self.num_samples * self.num_replicas
     self.shuffle = True
示例#15
0
def main(args, skip_test=False):

    cfg = c.clone()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    cfg.OUTPUT_DIR = os.path.join("output", cfg.OUTPUT_DIR, "train")
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    if (not args.skip_test) and (not skip_test):
        test(cfg, model, args.distributed)
示例#16
0
	def __init__(self, cfg, base_dir, topk=5, ckp="cache_test_log.json"):
		self.cfg = cfg.clone()
		self.ckp = ckp
		self.topk = topk
		self.base_dir = base_dir
		if not os.path.exists(base_dir):
			os.mkdir(base_dir)

		self.num_cycle = self.cfg.NAS.TEST_CYCLE

		self.backbone_layers = sum(self.cfg.MODEL.BACKBONE.STAGE_REPEATS)
		if self.cfg.MODEL.SEG_BRANCH.SHARE_SUBNET:
			self.head_layers = len(cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS) + cfg.MODEL.SEG_BRANCH.SUBNET_DEPTH
		else:
			self.head_layers = len(cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS) + 4 * cfg.MODEL.SEG_BRANCH.SUBNET_DEPTH
		self.inter_layers = 9

		self.backbone_ss_size = len(blocks_key)
		self.head_ss_size = len(head_ss_keys)
		self.inter_ss_size = len(inter_ss_keys)

		if 'search' in cfg.MODEL.BACKBONE.CONV_BODY:
			_lcm = self.backbone_ss_size*self.head_ss_size//math.gcd(self.backbone_ss_size,self.head_ss_size)
			self.lcm = self.inter_ss_size*_lcm//math.gcd(self.inter_ss_size, _lcm)
			self.search_backbone = True
		else:
			self.lcm = self.inter_ss_size*self.head_ss_size//math.gcd(self.inter_ss_size, self.head_ss_size)
			self.search_backbone = False

		self.cache_model_list = None # type: list[TEST_MODEL]
		self.exist_cycle = -1
		self.new_model_list = None # type: list[TEST_MODEL]

		self.backbone_sb = np.zeros((self.backbone_ss_size, self.backbone_layers), dtype=np.int)
		self.head_sb = np.zeros((self.head_ss_size, self.head_layers), dtype=np.int)
		self.inter_sb = np.zeros((self.inter_ss_size, self.inter_layers), dtype=np.int)

		self.rank = get_rank()
		self.world_size = get_world_size()
示例#17
0
def main():
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(config_file)
    cfg.merge_from_list(opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(config_file))
    with open(config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg,
                  local_rank,
                  distributed,
                  use_tensorboard=use_tensorboard)

    if not skip_test:
        test(cfg, model, distributed)
示例#18
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = '2, 3, 4, 0'

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
示例#19
0
    def train(self,
              output_dir=None,
              fine_tune_last_layers=False,
              fine_tune_rpn=False):
        if output_dir is not None:
            self.cfg.OUTPUT_DIR = output_dir
        model = build_detection_model(self.cfg)
        device = torch.device(self.cfg.MODEL.DEVICE)
        model.to(device)

        arguments = {}
        arguments["iteration"] = 0

        output_dir = self.cfg.OUTPUT_DIR

        save_to_disk = get_rank() == 0
        checkpointer = DetectronCheckpointer(self.cfg, model, None, None,
                                             output_dir, save_to_disk)

        if self.cfg.MODEL.WEIGHT.startswith(
                '/') or 'catalog' in self.cfg.MODEL.WEIGHT:
            model_path = self.cfg.MODEL.WEIGHT
        else:
            model_path = os.path.abspath(
                os.path.join(os.path.dirname(__file__), os.path.pardir,
                             os.path.pardir, os.path.pardir, os.path.pardir,
                             'Data', 'pretrained_feature_extractors',
                             self.cfg.MODEL.WEIGHT))

        extra_checkpoint_data = checkpointer.load(model_path)

        if self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES + 1 != self.cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES:
            checkpointer.model.roi_heads.box.predictor.cls_score = torch.nn.Linear(
                in_features=checkpointer.model.roi_heads.box.predictor.
                cls_score.in_features,
                out_features=self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES + 1,
                bias=True)
            checkpointer.model.roi_heads.box.predictor.bbox_pred = torch.nn.Linear(
                in_features=checkpointer.model.roi_heads.box.predictor.
                cls_score.in_features,
                out_features=(self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES + 1)
                * 4,
                bias=True)
            if hasattr(checkpointer.model.roi_heads, 'mask'):
                checkpointer.model.roi_heads.mask.predictor.mask_fcn_logits = torch.nn.Conv2d(
                    in_channels=checkpointer.model.roi_heads.mask.predictor.
                    mask_fcn_logits.in_channels,
                    out_channels=self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES +
                    1,
                    kernel_size=(1, 1),
                    stride=(1, 1))
            checkpointer.model.to(device)

        if fine_tune_last_layers:
            checkpointer.model.roi_heads.box.predictor.cls_score = torch.nn.Linear(
                in_features=checkpointer.model.roi_heads.box.predictor.
                cls_score.in_features,
                out_features=self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES + 1,
                bias=True)
            checkpointer.model.roi_heads.box.predictor.bbox_pred = torch.nn.Linear(
                in_features=checkpointer.model.roi_heads.box.predictor.
                cls_score.in_features,
                out_features=(self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES + 1)
                * 4,
                bias=True)
            if hasattr(checkpointer.model.roi_heads, 'mask'):
                checkpointer.model.roi_heads.mask.predictor.mask_fcn_logits = torch.nn.Conv2d(
                    in_channels=checkpointer.model.roi_heads.mask.predictor.
                    mask_fcn_logits.in_channels,
                    out_channels=self.cfg.MINIBOOTSTRAP.DETECTOR.NUM_CLASSES +
                    1,
                    kernel_size=(1, 1),
                    stride=(1, 1))
            # Freeze backbone layers
            for elem in checkpointer.model.backbone.parameters():
                elem.requires_grad = False
            if not fine_tune_rpn:
                # Freeze RPN layers
                for elem in checkpointer.model.rpn.parameters():
                    elem.requires_grad = False
            else:
                for elem in checkpointer.model.rpn.head.conv.parameters():
                    elem.requires_grad = False
                checkpointer.model.rpn.head.cls_logits = torch.nn.Conv2d(
                    in_channels=checkpointer.model.rpn.head.cls_logits.
                    in_channels,
                    out_channels=checkpointer.model.rpn.head.cls_logits.
                    out_channels,
                    kernel_size=(1, 1),
                    stride=(1, 1))
                checkpointer.model.rpn.head.bbox_pred = torch.nn.Conv2d(
                    in_channels=checkpointer.model.rpn.head.bbox_pred.
                    in_channels,
                    out_channels=checkpointer.model.rpn.head.bbox_pred.
                    out_channels,
                    kernel_size=(1, 1),
                    stride=(1, 1))
            # Freeze roi_heads layers with the exception of the predictor ones
            for elem in checkpointer.model.roi_heads.box.feature_extractor.parameters(
            ):
                elem.requires_grad = False
            for elem in checkpointer.model.roi_heads.box.predictor.parameters(
            ):
                elem.requires_grad = True
            if hasattr(checkpointer.model.roi_heads, 'mask'):
                for elem in checkpointer.model.roi_heads.mask.predictor.parameters(
                ):
                    elem.requires_grad = False
                for elem in checkpointer.model.roi_heads.mask.predictor.mask_fcn_logits.parameters(
                ):
                    elem.requires_grad = True
            checkpointer.model.to(device)

        checkpointer.optimizer = make_optimizer(self.cfg, checkpointer.model)
        checkpointer.scheduler = make_lr_scheduler(self.cfg,
                                                   checkpointer.optimizer)

        # Initialize mixed-precision training
        use_mixed_precision = self.cfg.DTYPE == "float16"
        amp_opt_level = 'O1' if use_mixed_precision else 'O0'
        model, optimizer = amp.initialize(checkpointer.model,
                                          checkpointer.optimizer,
                                          opt_level=amp_opt_level)

        if self.distributed:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[self.local_rank],
                output_device=self.local_rank,
                # this should be removed if we update BatchNorm stats
                broadcast_buffers=False,
            )

        data_loader = make_data_loader(self.cfg,
                                       is_train=True,
                                       is_distributed=self.distributed,
                                       start_iter=arguments["iteration"],
                                       is_target_task=self.is_target_task)

        test_period = self.cfg.SOLVER.TEST_PERIOD
        if test_period > 0:
            data_loader_val = make_data_loader(
                self.cfg,
                is_train=False,
                is_distributed=self.distributed,
                is_target_task=self.is_target_task)
        else:
            data_loader_val = None

        checkpoint_period = self.cfg.SOLVER.CHECKPOINT_PERIOD
        do_train(self.cfg,
                 model,
                 data_loader,
                 data_loader_val,
                 checkpointer.optimizer,
                 checkpointer.scheduler,
                 checkpointer,
                 device,
                 checkpoint_period,
                 test_period,
                 arguments,
                 is_target_task=self.is_target_task)

        logger = logging.getLogger("maskrcnn_benchmark")
        logger.handlers = []
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    # model_D = build_discriminator(cfg)
    model_D = model.get_discriminator()

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    # model_D.to(device)
    models = [model, model_D]

    optimizers = []
    for p in model_D.parameters():
        p.requires_grad = False
    optimizers.append(make_optimizer(cfg, model))
    for p in model_D.parameters():
        p.requires_grad = True
    optimizers.append(make_optimizer(cfg, model_D))
    schedulers = [
        make_lr_scheduler(cfg, optimizer) for optimizer in optimizers
    ]

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    models, optimizers = zip(*[
        amp.initialize(model, optimizer, opt_level=amp_opt_level)
        for model, optimizer in zip(models, optimizers)
    ])

    if distributed:
        models = [
            torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[local_rank],
                output_device=local_rank,
                # this should be removed if we update BatchNorm stats
                broadcast_buffers=False,
            ) for model in models
        ]

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointers = [
        DetectronCheckpointer(cfg, model, optimizers[0], schedulers[0],
                              output_dir, save_to_disk),
        DetectronCheckpointer(cfg, model_D, optimizers[1], schedulers[1],
                              output_dir, False),
    ]
    extra_checkpoint_data = [
        checkpointer.load(cfg.MODEL.WEIGHT) for checkpointer in checkpointers
    ]
    arguments.update(extra_checkpoint_data[0])

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    tflogger = SummaryWriter(log_dir=os.path.join(output_dir, "logs"))

    do_train(
        models,
        data_loader,
        optimizers,
        schedulers,
        checkpointers,
        device,
        checkpoint_period,
        arguments,
        tflogger,
    )

    return model
示例#21
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="data/occlusion_net_train.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)

    parser.add_argument(
        "--cometml-tag",
        dest="cometml_tag",
        default="occlusion-net",
    )

    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    #model = train(cfg, args.local_rank, args.distributed)
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device).eval()

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
def main(args):
    seed_torch()
    info = ulti.load_json()

    num_gpus = get_num_gpus()
    args.config_file = os.path.join(
        info['training_dir'], 'e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml')

    cfg.merge_from_file(args.config_file)
    cfg.defrost()
    cfg.OUTPUT_DIR = os.path.join(info['training_dir'], args.sub_dataset)
    cfg.MODEL.WEIGHT = os.path.join(info['dataset_dir'], info['experiment'],
                                    'Detector',
                                    'Iter{}.pth'.format(info['iter']))
    cfg.SOLVER.IMS_PER_BATCH = num_gpus * 4
    cfg.TEST.IMS_PER_BATCH = num_gpus * 16
    cfg.SOLVER.BASE_LR = 0.002
    cfg.freeze()

    mkdir(cfg.OUTPUT_DIR)

    if args.sub_dataset is None:
        args.sub_dataset = ""

    if args.vis_title is None:
        args.vis_title = os.path.basename(cfg.OUTPUT_DIR)

    logger = setup_logger("maskrcnn_benchmark", cfg.OUTPUT_DIR, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    # logger.info("\n" + collect_env_info())

    DatasetCatalog = None
    train_dataset = cfg.DATASETS.TRAIN[0]
    test_dataset = cfg.DATASETS.TEST[0]
    paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                cfg.PATHS_CATALOG, True)

    if args.sub_dataset != "":
        DatasetCatalog = paths_catalog.DatasetCatalog

        DatasetCatalog.DATASETS[train_dataset]['img_dir'] = os.path.join(
            info['dataset_dir'], 'Images')
        DatasetCatalog.DATASETS[train_dataset]['ann_file'] = os.path.join(
            info['dataset_dir'], 'RCNN_data', 'train.json')

        DatasetCatalog.DATASETS[test_dataset]['img_dir'] = os.path.join(
            info['dataset_dir'], 'Images')
        DatasetCatalog.DATASETS[test_dataset]['ann_file'] = os.path.join(
            info['dataset_dir'], 'RCNN_data', 'test.json')

        data = json.load(
            open(DatasetCatalog.DATASETS[train_dataset]['ann_file']))
    else:
        data = json.load(
            open(paths_catalog.DatasetCatalog.DATASETS[train_dataset]
                 ['ann_file']))

    iters_per_epoch = len(data['images'])
    iters_per_epoch = math.ceil(iters_per_epoch / cfg.SOLVER.IMS_PER_BATCH)
    args.iters_per_epoch = iters_per_epoch

    cfg.defrost()
    cfg.SOLVER.MAX_ITER = round(args.epochs * args.scale * iters_per_epoch)
    cfg.SOLVER.STEPS = (round(8 * args.scale * iters_per_epoch),
                        round(11 * args.scale * iters_per_epoch),
                        round(16 * args.scale * iters_per_epoch))
    cfg.freeze()

    # logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        # logger.info(config_str)
    # logger.info("Running with config:\n{}".format(cfg))

    # logger.info(DatasetCatalog)

    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    if args.train:
        args.skip_train = False
        logger.info(args)
        model = network.train(cfg, args, DatasetCatalog)

    if args.test:
        network.test(cfg, args, model=None, DatasetCatalog=DatasetCatalog)
示例#23
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )

    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    #added args for Seed detection 2 strategies
    #   parser.add_argument(
    #     "--strategy",
    #     default=1,
    #     # metavar="FILE",
    #     help="1 for strat 1 and 2 for strat 2",
    # )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1
    print(num_gpus)

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # cfg.merge_from_list(args.strategy)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
示例#24
0
def main():

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    # model = train(cfg, args.local_rank, args.distributed)
    model = build_detection_model(cfg)
    # add
    print(model)
    all_index = []
    for index, item in enumerate(model.named_parameters()):
        all_index.append(index)
        print(index)
        print(item[0])
        print(item[1].size())
    print("All index of the model: ", all_index)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=args.distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    # run_test(cfg, model, args.distributed)
    # pruning
    m = Mask(model)
    m.init_length()
    m.init_length()
    print("-" * 10 + "one epoch begin" + "-" * 10)
    print("remaining ratio of pruning : Norm is %f" % args.rate_norm)
    print("reducing ratio of pruning : Distance is %f" % args.rate_dist)
    print("total remaining ratio is %f" % (args.rate_norm - args.rate_dist))

    m.modelM = model
    m.init_mask(args.rate_norm, args.rate_dist)

    m.do_mask()
    m.do_similar_mask()
    model = m.modelM
    m.if_zero()
    # run_test(cfg, model, args.distributed)

    # change to use straightforward function to make its easy to implement Mask
    # do_train(
    #     model,
    #     data_loader,
    #     optimizer,
    #     scheduler,
    #     checkpointer,
    #     device,
    #     checkpoint_period,
    #     arguments,
    # )
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)
        # print("Loss dict",loss_dict)
        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()

        # prun
        # Mask grad for iteration
        m.do_grad_mask()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        # prun
        # 7375 is number iteration to train 1 epoch with batch-size = 16 and number train dataset exam is 118K (in coco)
        if iteration % args.iter_pruned == 0 or iteration == cfg.SOLVER.MAX_ITER - 5000:
            m.modelM = model
            m.if_zero()
            m.init_mask(args.rate_norm, args.rate_dist)
            m.do_mask()
            m.do_similar_mask()
            m.if_zero()
            model = m.modelM
            if args.use_cuda:
                model = model.cuda()
            #run_test(cfg, model, args.distributed)

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
if args.distributed:
    model = torch.nn.parallel.DistributedDataParallel(
        model,
        device_ids=[args.local_rank],
        output_device=args.local_rank,
        # this should be removed if we update BatchNorm stats
        broadcast_buffers=False,
    )

arguments = {}
arguments["iteration"] = 0

output_dir = cfg.OUTPUT_DIR

save_to_disk = get_rank() == 0
checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                     output_dir, save_to_disk)

extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
arguments.update(extra_checkpoint_data)

data_loader = make_data_loader(
    cfg,
    is_train=True,
    is_distributed=args.distributed,
    start_iter=arguments["iteration"],
)

checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
示例#26
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--cls_id", type=int, default=1)

    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument('--patched',
                        action='store_true',
                        help='patching patterns')
    parser.add_argument('--patchfile',
                        type=str,
                        default='',
                        help='patch to be applied')

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)
    patched = args.patched

    patchfile = args.patchfile if patched else ""
    cls_id = args.cls_id

    if patched:
        filename = args.ckpt.split('/')[-1][:-4] + '_' + args.patchfile.split(
            '/')[-2] + '_class_' + str(cls_id)
    else:
        filename = ""

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        if "physical" in dataset_name:
            filename_i = dataset_name + '_' + filename
        else:
            filename_i = filename
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            patched=patched,
            patchfile=patchfile,
            file_name=filename_i,
            cls_id=cls_id,
        )
        synchronize()
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--ngpu_shared_fc", type=list, default=1)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    size = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    "MASTER_ADDR"
    "MASTER_PORT"
    "RANK"
    "WORLD_SIZE"
    if True:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl",
            init_method="env://",  #rank=args.local_rank,world_size=size
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    proc_gpus = [int(i) for i in args.ngpu_shared_fc]
    model = train(cfg, proc_gpus, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
def train(cfg, local_rank, distributed):
    model, head = build_dist_face_trainer(cfg, local_rank)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    if cfg.MODEL.USE_SYNCBN:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if True:
        model = FaceDistributedDataParallel(
            model,
            device_ids=local_rank,
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            chunk_sizes=None,  #[32,56,56,56]
        )
        head_local_rank = None
        if len(local_rank) == 1:
            head_local_rank = local_rank
        head = FaceDistributedDataParallel(
            head,
            device_ids=head_local_rank,
            output_device=head_local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )
    model = torch.nn.Sequential(*[model, head])
    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)
    # head_optimizer = make_optimizer(cfg, head)
    # head_scheduler = make_lr_scheduler(cfg, head_optimizer)
    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)
    # head, head_optimizer = amp.initialize(head, head_optimizer, opt_level=amp_opt_level)

    arguments = {}
    arguments["iteration"] = 0
    output_dir = cfg.OUTPUT_DIR
    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    # head_checkpointer = DetectronCheckpointer(
    #     cfg, head, head_optimizer, head_scheduler, output_dir, save_to_disk
    # )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    #### init transforms #####
    transforms = T.Compose([
        T.RandomCrop((cfg.INPUT.SIZE_TRAIN[0], cfg.INPUT.SIZE_TRAIN[1])),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=cfg.INPUT.RGB_MEAN, std=cfg.INPUT.RGB_STD),
    ])
    data_loader = make_face_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
        transforms=transforms,
    )
    test_period = cfg.SOLVER.TEST_PERIOD
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    divs_nums = cfg.SOLVER.DIVS_NUMS_PER_BATCH
    do_face_train_dist_DIV_FC(
        cfg,
        model,  #[model,head],
        data_loader,
        None,
        optimizer,  #[optimizer,head_optimizer],
        scheduler,  #[scheduler,head_scheduler],
        checkpointer,  #[checkpointer,head_checkpointer],
        device,
        checkpoint_period,
        test_period,
        arguments,
        divs_nums,
    )
    return model
示例#29
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="configs/free_anchor_R-50-FPN_8gpu_1x.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # cfg.merge_from_list(['TEST.IMS_PER_BATCH', 1])
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)

    model.rpn = RetinaNetModule(cfg)

    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "NR", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders,
                                              data_loaders_val):
        inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            # box_only=cfg.MODEL.RPN_ONLY,
            box_only=False
            if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
示例#30
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--use_tensorboard",
        default=True,
        type=bool,
        help="Enable/disable tensorboard logging (enabled by default)")
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument("--log_step",
                        default=50,
                        type=int,
                        help='Number of iteration for each log')
    parser.add_argument(
        "--eval_mode",
        default="test",
        type=str,
        help=
        'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"'
    )
    parser.add_argument("--eval_step",
                        type=int,
                        default=15000,
                        help="Number of iterations for periodic evaluation")
    parser.add_argument(
        "--return_best",
        type=bool,
        default=False,
        help=
        "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set"
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    model = train(cfg, args)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
示例#31
0
    def __init__(self,
                 dataset,
                 shuffle=True,
                 distributed=False,
                 num_replicas=None,
                 rank=None,
                 args=None,
                 cfg=None):
        self.dataset = dataset
        # this is a list of list of names.
        # the first level correspond to a video, and the second
        # one to the names of the frames in the video
        self.video_data = dataset.video_data
        self.window_size = 1
        self.batch_size_per_gpu = 1
        self.epoch = 0
        self.shuffle = shuffle
        self.distributed = distributed
        self.is_train = True

        if args is not None:
            if hasattr(args, 'window_size'):
                self.window_size = args.window_size
            if hasattr(args, 'is_train'):
                self.is_train = args.is_train
        if cfg is not None:
            self.batch_size_per_gpu = get_batch_size_per_gpu(
                cfg.SOLVER.IMS_PER_BATCH if self.is_train else cfg.TEST.
                IMS_PER_BATCH)

        self.indices = []
        for video_id in sorted(self.video_data):
            frame_list = sorted(self.video_data[video_id])
            count = 0
            frame_ids = []
            for frame_id in sorted(frame_list):
                frame_ids.append(frame_id)
                count += 1
                if count == self.window_size:
                    self.indices.append(frame_ids)
                    frame_ids = []
                    count = 0
            if not args.is_train and count > 0:
                for i in range(self.window_size):
                    frame_ids.append(frame_id)
                    count += 1
                    if count == self.window_size:
                        self.indices.append(frame_ids)
                        frame_ids = []
                        count = 0
                        break
        self.num_samples = len(self.indices)
        self.total_size = self.num_samples
        # print(self.__len__())

        if self.distributed:
            if num_replicas is None:
                num_replicas = get_world_size()
            if rank is None:
                rank = get_rank()
            self.num_replicas = num_replicas
            self.rank = rank
            self.num_samples = int(
                math.ceil(self.num_samples * 1.0 / self.num_replicas))
            self.total_size = self.num_samples * self.num_replicas
示例#32
0
def main():
    #     apply_prior   prior_mask
    # 0        -             -
    # 1        Y             -
    # 2        -             Y
    # 3        Y             Y
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument('--num_iteration',
                        dest='num_iteration',
                        help='Specify which weight to load',
                        default=-1,
                        type=int)
    parser.add_argument('--object_thres',
                        dest='object_thres',
                        help='Object threshold',
                        default=0.4,
                        type=float)  # used to be 0.4 or 0.05
    parser.add_argument('--human_thres',
                        dest='human_thres',
                        help='Human threshold',
                        default=0.6,
                        type=float)
    parser.add_argument('--prior_flag',
                        dest='prior_flag',
                        help='whether use prior_flag',
                        default=1,
                        type=int)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1 and torch.cuda.is_available()

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    print('prior flag: {}'.format(args.prior_flag))

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    args.config_file = os.path.join(ROOT_DIR, args.config_file)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("DRG", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    # model.to(cfg.MODEL.DEVICE)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)

    if args.num_iteration != -1:
        args.ckpt = os.path.join(cfg.OUTPUT_DIR,
                                 'model_%07d.pth' % args.num_iteration)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    logger.info("Testing checkpoint {}".format(ckpt))
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    # iou_types = ("bbox",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            if args.num_iteration != -1:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho",
                                             dataset_name,
                                             "model_%07d" % args.num_iteration)
            else:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho",
                                             dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder

    opt = {}
    opt['word_dim'] = 300
    opt['use_thres_dic'] = 1
    for output_folder, dataset_name in zip(output_folders, dataset_names):
        data = DatasetCatalog.get(dataset_name)
        data_args = data["args"]
        test_detection = pickle.load(open(data_args['test_detection_file'],
                                          "rb"),
                                     encoding='latin1')
        word_embeddings = pickle.load(open(data_args['word_embedding_file'],
                                           "rb"),
                                      encoding='latin1')
        opt['thres_dic'] = pickle.load(open(data_args['threshold_dic'], "rb"),
                                       encoding='latin1')
        output_file = os.path.join(output_folder, 'detection_times.pkl')
        output_file_human = os.path.join(output_folder, 'detection_human.pkl')
        output_file_object = os.path.join(output_folder,
                                          'detection_object.pkl')
        # hico_folder = os.path.join(output_folder, 'HICO')
        output_map_folder = os.path.join(output_folder, 'map')

        logger.info("Output will be saved in {}".format(output_file))
        logger.info("Start evaluation on {} dataset.".format(dataset_name))

        run_test(model,
                 dataset_name=dataset_name,
                 test_detection=test_detection,
                 word_embeddings=word_embeddings,
                 output_file=output_file,
                 output_file_human=output_file_human,
                 output_file_object=output_file_object,
                 object_thres=args.object_thres,
                 human_thres=args.human_thres,
                 device=device,
                 cfg=cfg,
                 opt=opt)

        # Generate_HICO_detection(output_file, hico_folder)
        compute_hico_map(output_map_folder, output_file, 'test')
示例#33
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()