示例#1
0
 def test_build_batch_dataloader_inference(self):
     # Test that build_batch_data_loader can be used for inference
     N = 96
     ds = DatasetFromList(list(range(N)))
     sampler = InferenceSampler(len(ds))
     dl = build_batch_data_loader(ds, sampler, 8, num_workers=3)
     self._check_is_range(dl, N)
示例#2
0
def build_simple_dataloader(dataset_name: list, batch_size):
    dataset_dicts = get_detection_dataset_dicts(dataset_name)
    dataset = DatasetFromList(dataset_dicts, copy=False)

    cfg = get_cfg()
    cfg["aug_kwargs"] = {}

    dataset = MapDataset(dataset, AlbumentationsMapper(cfg, False))

    # set the shuffle to False in debugging mode
    sampler = TrainingSampler(len(dataset), shuffle=False, seed=42)
    dataloader = build_batch_data_loader(dataset=dataset, sampler=sampler,
                                         total_batch_size=batch_size)

    return dataloader
示例#3
0
def build_hand_train_loader(cfg):
    dataset_dicts, num_per_epoch = load_hand(cfg.HAND_PROJECT.DATA.MODE, cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=cfg.HAND_PROJECT.DATA.SELECTS)
    
    # pdb.set_trace()
    dataset = DatasetFromList(dataset_dicts, copy=False)
    mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    
    sampler = TrainingSampler(len(dataset))
    
    return build_batch_data_loader(dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS), num_per_epoch
示例#4
0
def build_detection_train_loader_with_train_sampler(cfg, mapper, seed=42, shuffle=True):
    dataset_dicts = get_detection_dataset_dicts(cfg.DATASETS.TRAIN)
    dataset = DatasetFromList(dataset_dicts, copy=False)
    dataset = MapDataset(dataset, mapper)

    logger = logging.getLogger(__name__)
    logger.info("Using training sampler TrainingSampler with shuffle=False")
    sampler = TrainingSampler(len(dataset), shuffle=shuffle, seed=seed)

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
示例#5
0
def build_weighted_detection_train_loader(cfg: CfgNode, mapper=None):
    dataset_repeat_factors = get_train_datasets_repeat_factors(cfg)
    # OrderedDict to guarantee order of values() consistent with repeat factors
    dataset_name_to_dicts = OrderedDict(
        {
            name: get_detection_dataset_dicts(
                [name],
                filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
                min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
                if cfg.MODEL.KEYPOINT_ON
                else 0,
                proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
                if cfg.MODEL.LOAD_PROPOSALS
                else None,
            )
            for name in cfg.DATASETS.TRAIN
        }
    )
    # Repeat factor for every sample in the dataset
    repeat_factors = [
        [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname])
        for dsname in cfg.DATASETS.TRAIN
    ]
    repeat_factors = list(itertools.chain.from_iterable(repeat_factors))

    dataset_dicts = dataset_name_to_dicts.values()
    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
    dataset = DatasetFromList(dataset_dicts, copy=False)
    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    logger.info(
        "Using WeightedTrainingSampler with repeat_factors={}".format(
            cfg.DATASETS.TRAIN_REPEAT_FACTOR
        )
    )
    sampler = RepeatFactorTrainingSampler(torch.tensor(repeat_factors))

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
示例#6
0
def build_x_train_loader(
    dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
):
    if isinstance(dataset, list):
        dataset = DatasetFromList(dataset, copy=False)
    if mapper is not None:
        dataset = MapDataset(dataset, mapper)
    if sampler is None:
        sampler = TrainingSampler(len(dataset))
    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
    return build_batch_data_loader(
        dataset,
        sampler,
        total_batch_size,
        aspect_ratio_grouping=aspect_ratio_grouping,
        num_workers=num_workers,
    )
示例#7
0
def build_train_loader(cfg, mapper=None):
    if mapper is None:
        mapper = get_dataset_mapper(cfg.DATASETS.TRAIN[0])(cfg, True)

    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts, copy=False)

    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    # TODO avoid if-else?
    if sampler_name == "TrainingSampler":
        sampler = TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
        sampler = RepeatFactorTrainingSampler(repeat_factors)
    elif sampler_name == "RatioFactorTrainingSampler":
        repeat_factors = repeat_factors_from_ratios(dataset_dicts)
        sampler = RepeatFactorTrainingSampler(repeat_factors)

    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))
    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
示例#8
0
def build_train_dataloader(cfg):  # like 'build_detection_train_loader'
    if 'coco_2017_train' in cfg.DATASETS.TRAIN:
        descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train")
        ds_train = DatasetFromList(descs_train, copy=False)
        mapper = DatasetMapper(cfg, True)
    else:  # Open-Image-Dataset
        if 'get_detection_dataset_dicts':
            all_descs_train: List[Dict] = DatasetCatalog.get("oid_train")
        if 'rebalancing':
            image_id_vs_idx = {}
            for idx, desc in enumerate(all_descs_train):
                image_id_vs_idx[desc['image_id']] = idx
            descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids()))
            print('_' * 50 + f'train dataset len: {len(descs_train)}')

        ds_train = DatasetFromList(descs_train, copy=False)

        if 'DatasetMapper':
            augs = [RandomContrast(0.8, 1.2),
                    RandomBrightness(0.8, 1.2),
                    RandomSaturation(0.8, 1.2)]
            augs.extend(build_augmentation(cfg, is_train=True))
            mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs))
    ds_train = MapDataset(ds_train, mapper)

    sampler = TrainingSampler(len(ds_train))
    data_loader = build_batch_data_loader(
        ds_train,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
    global DATA_LOADER
    DATA_LOADER = data_loader
    return data_loader
示例#9
0
            )
            dataset = DatasetFromList(dataset_dicts, copy=False)
            dataset = MapDataset(dataset, DatasetMapper(cfg, True))

            sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
            logger = logging.getLogger(__name__)
            logger.info("Using training sampler {}".format(sampler_name))
            # TODO avoid if-else?
            if sampler_name == "TrainingSampler":
                sampler = TrainingSampler(len(dataset))
            else:
                raise ValueError("Unknown training sampler: {}".format(sampler_name))
            data_loader = build_batch_data_loader(
                dataset,
                sampler,
                cfg.SOLVER.IMS_PER_BATCH,
                aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
            )

        # logger.info("Starting training from iteration {}".format(start_iter))
        # with EventStorage(start_iter) as storage:
        #     for data, iteration in zip(data_loader, range(start_iter, max_iter)):
        #         iteration = iteration + 1
        #         storage.step()
        #
        #         loss_dict = model(data)
        #         losses = sum(loss_dict.values())
        #         assert torch.isfinite(losses).all(), loss_dict
        #
        #         loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}