def test_build_batch_dataloader_inference(self): # Test that build_batch_data_loader can be used for inference N = 96 ds = DatasetFromList(list(range(N))) sampler = InferenceSampler(len(ds)) dl = build_batch_data_loader(ds, sampler, 8, num_workers=3) self._check_is_range(dl, N)
def build_simple_dataloader(dataset_name: list, batch_size): dataset_dicts = get_detection_dataset_dicts(dataset_name) dataset = DatasetFromList(dataset_dicts, copy=False) cfg = get_cfg() cfg["aug_kwargs"] = {} dataset = MapDataset(dataset, AlbumentationsMapper(cfg, False)) # set the shuffle to False in debugging mode sampler = TrainingSampler(len(dataset), shuffle=False, seed=42) dataloader = build_batch_data_loader(dataset=dataset, sampler=sampler, total_batch_size=batch_size) return dataloader
def build_hand_train_loader(cfg): dataset_dicts, num_per_epoch = load_hand(cfg.HAND_PROJECT.DATA.MODE, cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=cfg.HAND_PROJECT.DATA.SELECTS) # pdb.set_trace() dataset = DatasetFromList(dataset_dicts, copy=False) mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) sampler = TrainingSampler(len(dataset)) return build_batch_data_loader(dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS), num_per_epoch
def build_detection_train_loader_with_train_sampler(cfg, mapper, seed=42, shuffle=True): dataset_dicts = get_detection_dataset_dicts(cfg.DATASETS.TRAIN) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, mapper) logger = logging.getLogger(__name__) logger.info("Using training sampler TrainingSampler with shuffle=False") sampler = TrainingSampler(len(dataset), shuffle=shuffle, seed=seed) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_weighted_detection_train_loader(cfg: CfgNode, mapper=None): dataset_repeat_factors = get_train_datasets_repeat_factors(cfg) # OrderedDict to guarantee order of values() consistent with repeat factors dataset_name_to_dicts = OrderedDict( { name: get_detection_dataset_dicts( [name], filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) for name in cfg.DATASETS.TRAIN } ) # Repeat factor for every sample in the dataset repeat_factors = [ [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname]) for dsname in cfg.DATASETS.TRAIN ] repeat_factors = list(itertools.chain.from_iterable(repeat_factors)) dataset_dicts = dataset_name_to_dicts.values() dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) logger.info( "Using WeightedTrainingSampler with repeat_factors={}".format( cfg.DATASETS.TRAIN_REPEAT_FACTOR ) ) sampler = RepeatFactorTrainingSampler(torch.tensor(repeat_factors)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_x_train_loader( dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0 ): if isinstance(dataset, list): dataset = DatasetFromList(dataset, copy=False) if mapper is not None: dataset = MapDataset(dataset, mapper) if sampler is None: sampler = TrainingSampler(len(dataset)) assert isinstance(sampler, torch.utils.data.sampler.Sampler) return build_batch_data_loader( dataset, sampler, total_batch_size, aspect_ratio_grouping=aspect_ratio_grouping, num_workers=num_workers, )
def build_train_loader(cfg, mapper=None): if mapper is None: mapper = get_dataset_mapper(cfg.DATASETS.TRAIN[0])(cfg, True) dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD) sampler = RepeatFactorTrainingSampler(repeat_factors) elif sampler_name == "RatioFactorTrainingSampler": repeat_factors = repeat_factors_from_ratios(dataset_dicts) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, )
def build_train_dataloader(cfg): # like 'build_detection_train_loader' if 'coco_2017_train' in cfg.DATASETS.TRAIN: descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train") ds_train = DatasetFromList(descs_train, copy=False) mapper = DatasetMapper(cfg, True) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': all_descs_train: List[Dict] = DatasetCatalog.get("oid_train") if 'rebalancing': image_id_vs_idx = {} for idx, desc in enumerate(all_descs_train): image_id_vs_idx[desc['image_id']] = idx descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids())) print('_' * 50 + f'train dataset len: {len(descs_train)}') ds_train = DatasetFromList(descs_train, copy=False) if 'DatasetMapper': augs = [RandomContrast(0.8, 1.2), RandomBrightness(0.8, 1.2), RandomSaturation(0.8, 1.2)] augs.extend(build_augmentation(cfg, is_train=True)) mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs)) ds_train = MapDataset(ds_train, mapper) sampler = TrainingSampler(len(ds_train)) data_loader = build_batch_data_loader( ds_train, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) global DATA_LOADER DATA_LOADER = data_loader return data_loader
) dataset = DatasetFromList(dataset_dicts, copy=False) dataset = MapDataset(dataset, DatasetMapper(cfg, True)) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) # TODO avoid if-else? if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) data_loader = build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) # logger.info("Starting training from iteration {}".format(start_iter)) # with EventStorage(start_iter) as storage: # for data, iteration in zip(data_loader, range(start_iter, max_iter)): # iteration = iteration + 1 # storage.step() # # loss_dict = model(data) # losses = sum(loss_dict.values()) # assert torch.isfinite(losses).all(), loss_dict # # loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}