Exemplo n.º 1
0
def run_val(cfg, model, val_data_loaders, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    if cfg.MODEL.RELATION_ON:
        iou_types = iou_types + ("relations", )
    if cfg.MODEL.ATTRIBUTE_ON:
        iou_types = iou_types + ("attributes", )

    dataset_names = cfg.DATASETS.VAL
    for dataset_name, val_data_loader in zip(dataset_names, val_data_loaders):
        inference(
            cfg,
            model,
            val_data_loader,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=None,
        )
        synchronize()
Exemplo n.º 2
0
def test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=True, is_distributed=distributed)
    meters = TensorboardLogger(
        log_dir=cfg.TENSORBOARD_EXPERIMENT,
        delimiter="  ")
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            cfg,
            data_loader_val,
            dataset_name=dataset_name,
            meters=meters,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 3
0
def eval_net(model):
    model.eval()

    output_dir = cfg.OUTPUT_DIR

    iou_types = ("bbox",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()

    model.train()
Exemplo n.º 4
0
def test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 5
0
def test(cfg, model, distributed):
    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders,
                                              data_loaders_val):
        inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            maskiou_on=cfg.MODEL.MASKIOU_ON)
        synchronize()
Exemplo n.º 6
0
def run_test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )

    parser.add_argument("--checkpoint",
                        default="",
                        metavar="FILE",
                        help="path to checkpoint file")
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    distributed = (int(os.environ["WORLD_SIZE"]) > 1
                   if "WORLD_SIZE" in os.environ else False)

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, args.local_rank)
    logger.info(cfg)

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    load_from_checkpoint(cfg, model, args.checkpoint)

    data_loader_val = make_data_loader(cfg,
                                       is_train=False,
                                       is_distributed=distributed)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    inference(model,
              data_loader_val,
              iou_types=iou_types,
              box_only=cfg.MODEL.RPN_ONLY,
              device=cfg.MODEL.DEVICE)
Exemplo n.º 8
0
def main():
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(config_file)
    cfg.merge_from_list(opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 9
0
def run_test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON
            or cfg.MODEL.GAU_ON else cfg.MODEL.RPN_ONLY,
            # changed for fcos
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            ignore_uncertain=cfg.TEST.IGNORE_UNCERTAIN,
            use_iod_for_ignore=cfg.TEST.USE_IOD_FOR_IGNORE,
            eval_standard=cfg.TEST.COCO_EVALUATE_STANDARD,
            use_last_prediction=cfg.TEST.DEBUG.USE_LAST_PREDICTION,
            evaluate_method=cfg.TEST.EVALUATE_METHOD,
            voc_iou_ths=cfg.TEST.VOC_IOU_THS,
            gt_file={
                'merge':
                cfg.TEST.MERGE_GT_FILE,
                'sub':
                DatasetCatalog.DATA_DIR + '/' +
                DatasetCatalog.DATASETS[dataset_name]["ann_file"]
            },
            use_ignore_attr=cfg.TEST.USE_IGNORE_ATTR)
        synchronize()
Exemplo n.º 10
0
def run_val(cfg, model, distributed, modelname):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders,
                                              data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            maskiou_on=cfg.MODEL.MASKIOU_ON)
        if result:
            file = open(os.path.join(output_folder, "eval_log"), 'a')
            file.writelines(modelname + '_AP:' +
                            str(result[0].results['segm']['AP']) + '\n')
        synchronize()
Exemplo n.º 11
0
def test_model(cfg, model, distributed, iters_per_epoch, dllogger, args):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    results = []
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            dllogger=dllogger,
            args=args)
        synchronize()
        results.append(result)
    if is_main_process():
        map_results, raw_results = results[0]
        bbox_map = map_results.results["bbox"]['AP']
        if cfg.MODEL.MASK_ON:
            segm_map = map_results.results["segm"]['AP']
        else:
            segm_map = 0.0
        dllogger.log(step=(
            cfg.SOLVER.MAX_ITER,
            cfg.SOLVER.MAX_ITER / iters_per_epoch,
        ),
                     data={
                         "BBOX_mAP": bbox_map,
                         "MASK_mAP": segm_map
                     })
        dllogger.log(step=tuple(),
                     data={
                         "BBOX_mAP": bbox_map,
                         "MASK_mAP": segm_map
                     })

        args.writer.add_scalar('BBOX_mAP', bbox_map,
                               cfg.SOLVER.MAX_ITER / iters_per_epoch + 1)
        args.writer.add_scalar('MASK_mAP', segm_map,
                               cfg.SOLVER.MAX_ITER / iters_per_epoch + 1)
Exemplo n.º 12
0
def run_test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    ignore_cls = cfg.INPUT.IGNORE_CLS
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        if not ignore_cls and 'coco' in dataset_name and cfg.WEAK.MODE and cfg.WEAK.NUM_CLASSES != 80:
            print(f"override ignore_cls -> True for {dataset_name}")
            ignore_cls = True
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            ignore_cls=ignore_cls,
        )
        synchronize()
Exemplo n.º 13
0
def run_test(cfg, model, distributed, iteration_name):
    global best_val_map, is_best_val_map, cur_val_map, writer
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name + '_' + iteration_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        results = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        if not is_main_process():
            synchronize()
            return
        if iteration_name != 'final':
            for k, v in results.results.items():
                for ki, vi in v.items():
                    if ki == 'AP':
                        cur_val_map = vi
                        if vi > best_val_map:
                            best_val_map = vi
                            is_best_val_map = True
                        else:
                            is_best_val_map = False
                    writer.add_scalar(dataset_name + '_' + k + '_' + ki, vi,
                                      int(iteration_name))
                    # print(dataset_name + '_' + k + '_' + ki, vi)
        synchronize()
Exemplo n.º 14
0
def do_test(cfg, model, distributed, threshold, **kwargs):
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model = model.module
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = kwargs.pop('dataset_names', cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed,
                                        datasets=kwargs.pop('datasets', None))
    test_results = []
    generate_pseudo_labels = kwargs.pop('generate_pseudo_labels',
                                        cfg.TEST.GENERATE_PSEUDO_LABELS)
    if isinstance(model, AdaptionRCNN):
        model.generate_pseudo_labels = generate_pseudo_labels

    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            threshold,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            generate_pseudo_labels=generate_pseudo_labels,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            **kwargs)
        test_results.append(result)
        synchronize()
    return test_results
Exemplo n.º 15
0
def test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    global _first_test
    if _first_test:
        log_event(key=constants.EVAL_SAMPLES, value=len(data_loaders_val))
        _first_test = False

    results = []
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        # Note: this synchronize() would break async results by not allowing them
        # to actually be async
        # synchronize()
        results.append(result)
    return results
Exemplo n.º 16
0
def run_val(cfg, model, val_data_loaders, distributed, logger):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()
    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    if cfg.MODEL.RELATION_ON:
        iou_types = iou_types + ("relations", )
    if cfg.MODEL.ATTRIBUTE_ON:
        iou_types = iou_types + ("attributes", )

    dataset_names = cfg.DATASETS.VAL
    val_result = []
    for dataset_name, val_data_loader in zip(dataset_names, val_data_loaders):
        dataset_result = inference(
                            cfg,
                            model,
                            val_data_loader,
                            dataset_name=dataset_name,
                            iou_types=iou_types,
                            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                            device=cfg.MODEL.DEVICE,
                            expected_results=cfg.TEST.EXPECTED_RESULTS,
                            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                            output_folder=None,
                            logger=logger,
                        )
        synchronize()
        val_result.append(dataset_result)
    # support for multi gpu distributed testing
    gathered_result = all_gather(torch.tensor(dataset_result).cpu())
    gathered_result = [t.view(-1) for t in gathered_result]
    gathered_result = torch.cat(gathered_result, dim=-1).view(-1)
    valid_result = gathered_result[gathered_result>=0]
    val_result = float(valid_result.mean())
    del gathered_result, valid_result
    torch.cuda.empty_cache()
    return val_result
Exemplo n.º 17
0
def test(cfg, model, distributed):
    data_loader_val = make_data_loader(cfg,
                                       is_train=False,
                                       is_distributed=distributed)
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    results = inference(
        model,
        data_loader_val,
        iou_types=iou_types,
        box_only=cfg.MODEL.RPN_ONLY,
        device=cfg.MODEL.DEVICE,
        expected_results=cfg.TEST.EXPECTED_RESULTS,
        expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
    )

    # returning results
    return results
Exemplo n.º 18
0
def run_test(iter, model, args, iou_types, output_folders, dataset_names, distributed):
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        results, coco_results = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            no_eval=args.no_eval,
        )
        synchronize()
        with open(os.path.join(output_folder, 'all.txt'), 'a+') as f:
            obj = {
                'iter': iter,
                'results': results.results,
                'coco_results': coco_results,
            }
            json.dump(obj, f)
            f.write('\n')
Exemplo n.º 19
0
def do_val(model, data_loader_val, iteration, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    dataset_name = cfg.DATASETS.TEST
    iou_types = ("bbox", )
    output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", cfg.FILE,
                                 dataset_name[0], str(iteration))
    print("output folder: ", output_folder)
    mkdir(output_folder)
    res_citypersons, res_coco = inference(
        model,
        data_loader_val[0],
        dataset_name=dataset_name,
        iou_types=iou_types,
        box_only=cfg.MODEL.RPN_ONLY,
        device=cfg.MODEL.DEVICE,
        expected_results=cfg.TEST.EXPECTED_RESULTS,
        expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        output_folder=output_folder,
    )
    synchronize()
    torch.cuda.empty_cache()
    return res_citypersons, res_coco
Exemplo n.º 20
0
def run_test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    # output_folders = [None] * len(cfg.DATASETS.TEST)
    # dataset_names = cfg.DATASETS.TEST
    dataset_names = cfg.DATASETS.NAS_VAL if not cfg.NAS.TRAIN_SINGLE_MODEL else cfg.DATASETS.TEST
    output_folders = [None] * len(dataset_names)

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    if cfg.NAS.TRAIN_SINGLE_MODEL:
        if get_rank() == 0:
            print('==' * 20, 'Evaluating single model...', '==' * 20)
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_val):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                c2d_json_path=cfg.MODEL.SEG_BRANCH.JSON_PATH,
                cfg=cfg,
            )
            synchronize()
        if get_rank() == 0:
            if 'coco' in cfg.DATASETS.NAME.lower():
                print('Evaluating panoptic results on COCO...')
                os.system(
                    'sh panoptic_scripts/bash_coco_val_evaluate.sh {} | tee pq_results'
                    .format(cfg.OUTPUT_DIR))
    elif not cfg.NAS.SKIP_NAS_TEST:
        if get_rank() == 0:
            print('==' * 10, 'Start NAS testing', '==' * 10)
        timer = Timer()
        timer.tic()
        searcher = PathPrioritySearch(cfg, base_dir='./nas_test')
        searcher.generate_fair_test(
        )  # load cache results and generate new model for test
        searcher.search(model, output_folders, dataset_names, distributed)
        searcher.save_topk()
        total_time = timer.toc()
        total_time_str = get_time_str(total_time)
        if get_rank() == 0:
            print('Finish NAS testing, total time:{}'.format(total_time_str))
        os._exit(0)
    else:
        print('Skipping NAS testing...')
Exemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )

    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    #added args for Seed detection 2 strategies
    #   parser.add_argument(
    #     "--strategy",
    #     default=1,
    #     # metavar="FILE",
    #     help="1 for strat 1 and 2 for strat 2",
    # )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1
    print(num_gpus)

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # cfg.merge_from_list(args.strategy)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--cls_id", type=int, default=1)

    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument('--patched',
                        action='store_true',
                        help='patching patterns')
    parser.add_argument('--patchfile',
                        type=str,
                        default='',
                        help='patch to be applied')

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)
    patched = args.patched

    patchfile = args.patchfile if patched else ""
    cls_id = args.cls_id

    if patched:
        filename = args.ckpt.split('/')[-1][:-4] + '_' + args.patchfile.split(
            '/')[-2] + '_class_' + str(cls_id)
    else:
        filename = ""

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        if "physical" in dataset_name:
            filename_i = dataset_name + '_' + filename
        else:
            filename_i = filename
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            patched=patched,
            patchfile=patchfile,
            file_name=filename_i,
            cls_id=cls_id,
        )
        synchronize()
Exemplo n.º 23
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        # with amp.scale_loss(losses, optimizer) as scaled_losses:
        #     scaled_losses.backward()
        losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemplo n.º 24
0
def do_face_train_triplet(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    divs_nums,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    dataset_names = cfg.DATASETS.TEST
    for iteration, (img_a, img_p, img_n, label_p,
                    label_n) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration
        img_a_list, _ = divs_tensors(device=device,
                                     tensors=img_a,
                                     targets=None,
                                     divs_nums=divs_nums)
        img_p_list, label_p_list = divs_tensors(device=device,
                                                tensors=img_p,
                                                targets=label_p,
                                                divs_nums=divs_nums)
        img_n_list, label_n_list = divs_tensors(device=device,
                                                tensors=img_n,
                                                targets=label_n,
                                                divs_nums=divs_nums)
        ####======== 拆分batch 可能对bn层有影响 ==========####
        optimizer.zero_grad()
        for img_a, img_p, img_n, label_p, label_n in zip(
                img_a_list, img_p_list, img_n_list, label_p_list,
                label_n_list):
            loss_dict = model(tensors=[img_a, img_p, img_n],
                              targets=[label_p, label_n],
                              batch=iteration,
                              total_batch=None)
            losses = sum(loss for loss in loss_dict.values())
            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)
            losses /= divs_nums
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()
        optimizer.step()
        scheduler.step()
        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            if iteration > 40000:
                checkpointer.save_backbone("BACKBONE_{:07d}".format(iteration))
        #####========= data test ============#######
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
            checkpointer.save_backbone("model_final")
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemplo n.º 25
0
def do_train(
    cfg,
    model,
    data_loader_support,
    data_loader_query,
    data_loader_val_support,
    data_loader_val_test,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    meters,
    meters_val,
):

    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    # meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader_support)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    batch_cls_json_file = cfg.MODEL.FEW_SHOT.SUP_INDICE_CLS
    with open(batch_cls_json_file, 'r') as f:
        batch_cls_sup = json.load(f)

    if cfg.MODEL.QRY_BALANCE:
        qry_cls_json_file = cfg.MODEL.QRY_INDICE_CLS
        with open(qry_cls_json_file, 'r') as f:
            batch_cls_qry = json.load(f)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    rank = dist.get_rank()
    # if is_main_process():
    #     import pdb
    #     pdb.set_trace()
    # else:
    #     return
    # for name, param in model. named_parameters():
    #     print(name, param, True if param.grad is not None else False)

    query_iterator = data_loader_query.__iter__()
    # print('len(data_loader_query):', len(data_loader_query))
    # import pdb; pdb.set_trace()
    weights_novel_all = []
    iteration_qry = 0
    for iteration, (images_sup, targets_sup, idx) in enumerate(data_loader_support, start_iter):
        if any(len(target) < 1 for target in targets_sup):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx} || targets Length={[len(target) for target in targets_sup]}")
            continue
        data_time = time.time() - end
        batch_id = batch_cls_sup[rank][iteration]

        iteration = iteration + 1
        arguments["iteration"] = iteration
        scheduler.step()
        images_sup = images_sup.to(device)
        targets_sup = [target.to(device) for target in targets_sup]
        # update weight:
        # print(targets_sup)
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        # print(iteration, idx, batch_id, targets_sup[0].extra_fields)

        weight_novel = model(images_sup, targets_sup,
                             is_support=True, batch_id=batch_id)
        # weights_novel[rank] = weight_novel
        # print('batch_id', batch_id, weight_novel[:10])
        # weight_novel = {batch_id:weight_novel}
        torch.cuda.empty_cache()

        # synchronize()
        weights_novel = [torch.empty_like(weight_novel)
                         for i in range(dist.get_world_size())]
        weights_novel = torch.cat(
            diffdist.functional.all_gather(weights_novel, weight_novel))
        # print(weights_novel[:,:10])
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        weights_novel_all.append(weights_novel)
        # # print(weights_novel_all)
        # print(torch.cat(weights_novel_all).size())
        # print(torch.cat(weights_novel_all)[:,:10])
        # (torch.cat(gather_list) * torch.cat(gather_list)).mean().backward()
        # print(weights_novel)
        if iteration % iter_size == 0:
            optimizer.zero_grad()
            losses_reduced = 0
            loss_dict_all = {}
            for i in range(iter_size_qry):
                images_qry, targets_qry, idx = query_iterator.next()
                images_qry = images_qry.to(device)
                targets_qry = [target.to(device) for target in targets_qry]
                if cfg.MODEL.QRY_BALANCE:
                    batch_id_qry = batch_cls_qry[rank][iteration_qry]
                    iteration_qry += 1
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, batch_id=batch_id_qry, weights_novel=torch.cat(weights_novel_all))
                else:
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, weights_novel=torch.cat(weights_novel_all))
                # if is_main_process():
                #     print('loss_dict', loss_dict)
                losses = sum(loss for loss in loss_dict.values()
                             ) / iter_size_qry
                # losses.backward(retain_graph=True)
                with amp.scale_loss(losses, optimizer) as scaled_losses:
                    scaled_losses.backward(retain_graph=True)
                torch.cuda.empty_cache()
                loss_dict_all = add_dict(loss_dict_all, loss_dict)
            loss_dict_all = avg_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_all', loss_dict_all)
            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_reduced', loss_dict_reduced)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            # losses_dict_reduced = add_dict(losses_dict_reduced, loss_dict_reduced)

            meters.update(iteration / iter_size_qry, loss=losses_reduced,
                          lr=optimizer.param_groups[0]["lr"], **loss_dict_reduced)

            weights_novel_all = []

            # (weights_novel * weights_novel).mean().backward()
            # for name, param in model. named_parameters():
            # if 'backbone' not in name:
            # print(name, True if param.grad is not None else False)
            optimizer.step()
            batch_time = time.time() - end
            end = time.time()
            meters.update(iteration, time=batch_time, data=data_time)
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            torch.cuda.empty_cache()
        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val_support is not None and test_period > 0 and iteration % test_period == 0:
            # meters_val = MetricLogger(delimiter="  ")
            synchronize()
            # """
            model.train()
            with torch.no_grad():
                weights_novel_val_sup_all = []
                current_classifier_novel = torch.zeros(
                    [iter_size * nGPU, 1024]).to(device)
                # print(current_classifier_novel)
                avg_steps = 0
                for iteration_val_sup, (images_val_sup, targets_val_sup, idx_val_sup) in enumerate(tqdm(data_loader_val_support)):
                    if any(len(target) < 1 for target in targets_val_sup):
                        logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx_val_sup} || targets Length={[len(target) for target in targets_val_sup]}")
                        continue
                    batch_id_val_sup = batch_cls_sup[rank][int(
                        iteration_val_sup)]
                    # print(iteration_val_sup)

                    images_val_sup = images_val_sup.to(device)
                    targets_val_sup = [target.to(device)
                                       for target in targets_val_sup]
                    weight_novel_val_sup = model(images_val_sup, targets_val_sup,
                                                 is_support=True, batch_id=batch_id_val_sup)
                    # weights_novel[rank] = weight_novel_val_sup
                    # print(weight_novel_val_sup.size())
                    # print('before', weight_novel_val_sup)
                    # print('batch_id', batch_id, weight_novel_val_sup[:10])
                    # weight_novel_val_sup = {batch_id:weight_novel_val_sup}
                    torch.cuda.empty_cache()

                    # synchronize()
                    weights_novel_val_sup = [torch.empty_like(weight_novel_val_sup)
                                             for i in range(dist.get_world_size())]
                    dist.all_gather(weights_novel_val_sup,
                                    weight_novel_val_sup)
                    # weights_novel_val_sup = torch.cat(
                    #     all_gather(weight_novel_val_sup))
                    # print('after', weights_novel_val_sup)
                    # print(idx, weights_novel_val_sup)
                    # print(weights_novel_val_sup[:,:10])
                    # if is_main_process():
                    #     import pdb
                    #     pdb.set_trace()
                    # else:
                    #     return
                    weights_novel_val_sup_all.append(
                        torch.cat(weights_novel_val_sup))
                    # print('length', len(weights_novel_val_sup_all))

                    if (iteration_val_sup + 1) % iter_size_qry == 0:
                        # print(torch.cat(weights_novel_val_sup_all).size())
                        # weights_novel_val_sup_all = []
                        avg_steps += 1
                        # print('current_classifier_novel', current_classifier_novel)
                        # print('weights_novel_val_sup_all', weights_novel_val_sup_all)
                        current_classifier_novel = current_classifier_novel + \
                            torch.cat(weights_novel_val_sup_all)
                        weights_novel_val_sup_all = []

                # if is_main_process():
                #     import pdb
                #     pdb.set_trace()
                # else:
                #     return
                # print(iteration_val_sup)
                current_classifier_novel_avg = current_classifier_novel / avg_steps
                model.module.roi_heads.box.cls_weights = torch.cat([model.module.roi_heads.box.predictor.cls_score.weight,
                                                                    current_classifier_novel_avg])
                # """
            output_folder = os.path.join(cfg.OUTPUT_DIR, "Validation")
            mkdir(output_folder)
            np.save(os.path.join(output_folder, 'cls_weights_'+str(iteration / iter_size_qry)), np.array(model.module.roi_heads.box.cls_weights.cpu().data))

            res_infer = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                iteration / iter_size,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=(
                    get_world_size() > 1), is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            # import pdb; pdb.set_trace()
            if res_infer:
                meters_val.update(iteration / iter_size, **res_infer)

            synchronize()
            # print('eval')
            model.train()

            """
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val_test)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(
                        iteration / iter_size, loss=losses_reduced, **loss_dict_reduced)
            """
            synchronize()
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration / iter_size,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
#             """
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
            # import json
            # json.dump(model.module.roi_heads.box.cls_weights, open(os.path.join(output_folder, 'cls_weights.json'), 'w'))

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Exemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Exemplo n.º 27
0
def main():
    args = parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.merge_from_list(["MODEL.WEIGHT", args.weight])

    output_dir = os.path.dirname(cfg.MODEL.WEIGHT)
    cfg.OUTPUT_DIR = output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.MODEL.WEIGHT)
    _ = checkpointer.load(cfg.MODEL.WEIGHT, cfg.TRAIN.IGNORE_LIST)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    # default `log_dir` is "runs" - we'll be more specific here
    # tb_writer = SummaryWriter('runs/6dvnet_test_3d_1')

    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        # dataiter = iter(data_loader_val)
        # images, bbox, labels = dataiter.next()

        # create grid of images
        # img_grid = make_grid(images.tensors)

        # show images
        # matplotlib_imshow(img_grid, one_channel=False)

        # write to tensorboard
        # tb_writer.add_image('6dvnet_test_3d_1', img_grid)
        #
        # tb_writer.add_graph(model, images.tensors)
        # tb_writer.close()

        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            cfg=cfg,
        )
        synchronize()
Exemplo n.º 28
0
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()

    rejsp = pathjoin(cfg.OUTPUT_DIR,
                     "inference/%s/bbox.json" % cfg.DATASETS.TEST[0])

    from maskrcnn_benchmark.config.paths_catalog import DatasetCatalog
    annjsp = pathjoin(
        args.data_root, *DatasetCatalog.DATASETS[cfg.DATASETS.TEST[0]]
        ['ann_file'].split('/')[1:])
    method = basename(cfg.OUTPUT_DIR)
Exemplo n.º 29
0
def train(cfg, local_rank, distributed, d_path=None):

    MaskDnet = MaskDiscriminator(nc=256)
    BBoxDnet = BoxDiscriminator(nc=256, ndf=64)
    Dnet = CombinedDiscriminator(MaskDnet, BBoxDnet)
    model = Mask_RCNN(cfg)
    g_rcnn = GAN_RCNN(model, Dnet)

    device = torch.device(cfg.MODEL.DEVICE)
    g_rcnn.to(device)

    g_optimizer = make_optimizer(cfg, model)
    d_optimizer = make_D_optimizer(cfg, Dnet)

    g_scheduler = make_lr_scheduler(cfg, g_optimizer)
    d_scheduler = make_lr_scheduler(cfg, d_optimizer)
    # model.BoxDnet = BBoxDnet

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, g_optimizer = amp.initialize(model, g_optimizer, opt_level=amp_opt_level)
    Dnet, d_optimizer = amp.initialize(Dnet, d_optimizer, opt_level=amp_opt_level)

    if distributed:
        g_rcnn = torch.nn.parallel.DistributedDataParallel(
                    g_rcnn, device_ids=[local_rank], output_device=local_rank,
                    # this should be removed if we update BatchNorm stats
                    broadcast_buffers=False,
                )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, g_optimizer, g_scheduler, output_dir, save_to_disk
    )

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)

    arguments.update(extra_checkpoint_data)

    d_checkpointer = DetectronCheckpointer(
        cfg, Dnet, d_optimizer, d_scheduler, output_dir, save_to_disk
    )

    if d_path:
        d_checkpointer.load(d_path, use_latest=False)

    data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )

    test_period = cfg.SOLVER.TEST_PERIOD
    data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    ## START TRAINING
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")

    meters = TensorboardLogger(
            log_dir=cfg.OUTPUT_DIR + "/tensorboardX",
            start_iter=arguments['iteration'],
            delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    g_rcnn.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)

    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        g_loss_dict, d_loss_dict = g_rcnn(images, targets)

        g_losses = sum(loss for loss in g_loss_dict.values())
        d_losses = sum(loss for loss in d_loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        g_loss_dict_reduced = reduce_loss_dict(g_loss_dict)
        g_losses_reduced = sum(loss for loss in g_loss_dict_reduced.values())
        
        d_loss_dict_reduced = reduce_loss_dict(d_loss_dict)
        d_losses_reduced = sum(loss for loss in d_loss_dict_reduced.values())
        
        meters.update(total_g_loss=g_losses_reduced, **g_loss_dict_reduced)
        meters.update(total_d_loss=d_losses_reduced, **d_loss_dict_reduced)

        g_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(g_losses, g_optimizer) as g_scaled_losses:
            g_scaled_losses.backward()
        g_optimizer.step()
        g_scheduler.step()
        
        
        d_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(d_losses, d_optimizer) as d_scaled_losses:
            d_scaled_losses.backward()
        d_optimizer.step()
        d_scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            d_checkpointer.save("dnet_{:07d}".format(iteration), **arguments)
            
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=cfg.OUTPUT_DIR,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Exemplo n.º 30
0
def do_train(
    cfg,
    total_model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    args,
):
    if len(total_model) > 1:
        model = total_model[1]
        t_model = total_model[0]
    else:
        model = total_model[0]
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()

    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg[0].MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg[0].MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg[0].DATASETS.TEST

    pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step()
        if not pytorch_1_1_0_or_later:
            scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict, features_dict = model(images, targets)
        if len(total_model) > 1:
            with torch.no_grad():
                t_loss_dict, t_features_dict = t_model(images, targets)
            # with torch.no_grad():
            #     # teacher_model = t_model
            #     t_weight = torch.load('./weights/centermask-V-19-eSE-FPN-ms-3x.pth')
            #     t_weight = t_weight['model']
            #     new_tweight = OrderedDict()
            #     for k, v in t_weight.items():
            #         name = k[7:]  # remove `module.`
            #         new_tweight[name] = v
            #     t_model.load_state_dict(new_tweight)
            #     t_loss_dict, t_features_dict = t_model(images, targets)

        if args.loss_head:

            loss_regression = new_box_loss(t_loss_dict['loss_reg'],
                                           loss_dict['loss_reg'])
            loss_center = new_center_loss(t_loss_dict['loss_centerness'],
                                          loss_dict['loss_centerness'])
            mode = 'KL'  # mode = 'KL' or 'cross-entropy'
            loss_pixel_wise = pixel_wise_loss(features_dict['box_cls'],
                                              t_features_dict['box_cls'], mode)
            loss_head = (loss_regression + loss_center + loss_pixel_wise)
            loss_dict.setdefault('loss_head', loss_head)
            del loss_dict['loss_reg']
            del loss_dict['loss_centerness']

        if iteration > cfg[0].SOLVER.WARMUP_ITERS:
            if args.loss_correlation:
                correlation = True
                loss_corr = get_feature(t_model, model, images, targets,
                                        correlation)
                loss_dict.setdefault('loss_corr', loss_corr)
            if args.loss_featuremap:
                correlation = False
                loss_featuremap = get_feature(t_model, model, images, targets,
                                              correlation)
                loss_dict.setdefault('loss_featuremap', loss_featuremap)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if pytorch_1_1_0_or_later:
            scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0 and iteration != 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg[0],
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg[0].MODEL.MASK_ON else cfg[0].MODEL.RPN_ONLY,
                device=cfg[0].MODEL.DEVICE,
                expected_results=cfg[0].TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg[0].TEST.
                EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    if len(loss_dict) > 1:
                        loss_dict = loss_dict[0]
                    else:
                        loss_dict = loss_dict
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Exemplo n.º 31
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help="The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = cfg.OUTPUT_DIR
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank(), filename="testlog.txt")
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    if amp is not None:
        # Initialize mixed-precision if necessary
        use_mixed_precision = cfg.DTYPE == 'float16'
        amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    ignore_cls = cfg.INPUT.IGNORE_CLS
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        if not ignore_cls and 'coco' in dataset_name and cfg.WEAK.MODE and cfg.WEAK.NUM_CLASSES != 80:
            logger.info(f"override ignore_cls -> True for {dataset_name}")
            ignore_cls = True
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            ignore_cls=ignore_cls,
        )
        synchronize()
Exemplo n.º 32
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = '2'
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/home/asd/Project/AirplaneDetection/Gliding-vertex-Trainer/gliding_vertex-master/configs/glide/dota.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=
        "/home/asd/Project/AirplaneDetection/Gliding-vertex-Trainer/exp_dota/0928/model_0040000.pth",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    # sys.path.insert(-1, '/home/asd/Project/AirplaneDetection/Gliding-vertex-Trainer/maskrcnn-benchmark/maskrcnn-benchmark')

    print(sys.path)

    from maskrcnn_benchmark.config import cfg
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    print(output_folders)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()