def build_dataloaders( self, pin_memory: bool, current_train_phase_idx=0 ) -> torch.utils.data.DataLoader: """ Build PyTorch dataloaders for all the available_splits. By default, we construct the standard PyTorch Dataloader and allow setting all dataloader options. """ # Gives sampler same seed for entire distributed group as per pytorch documentation. sampler_seed = self.config["SEED_VALUE"] loaders = { split.lower(): build_dataloader( dataset=self.datasets[split.lower()], dataset_config=self.config["DATA"][split], num_dataloader_workers=self.config.DATA.NUM_DATALOADER_WORKERS, pin_memory=pin_memory, multi_processing_method=self.config.MULTI_PROCESSING_METHOD, device=self.device, sampler_seed=sampler_seed, split=split.lower(), ) for split in self.available_splits } return loaders
def benchmark_data(cfg: AttrDict, split: str = "train"): split = split.upper() total_images = MAX_ITERS * cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"] timer = Timer() dataset = build_dataset(cfg=cfg, split=split) try: device = torch.device("cuda" if cfg.MACHINE.DEVICE == "gpu" else "cpu") except AttributeError: device = torch.device("cuda") dataloader = build_dataloader( dataset=dataset, dataset_config=cfg["DATA"][split], num_dataloader_workers=cfg.DATA.NUM_DATALOADER_WORKERS, pin_memory=False, multi_processing_method=cfg.MULTI_PROCESSING_METHOD, device=device, sampler_seed=cfg.SEED_VALUE, split=split, ) # Fairstore data sampler would require setting the start iter before it can start. if hasattr(dataloader.sampler, "set_start_iter"): dataloader.sampler.set_start_iter(0) # initial warmup measured as warmup time timer.reset() data_iterator = iter(dataloader) for i in range(10): # warmup next(data_iterator) if i == 0: # the total number of seconds since the start/reset of the timer warmup_time = timer.seconds() logging.info(f"Warmup time {WARMUP_ITERS} batches: {warmup_time} seconds") # measure the number of images per sec in 1000 iterations. timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"iters: {MAX_ITERS}; images: {total_images}; time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} ") # run benchmark for a few more rounds to catch fluctuations for round_idx in range(BENCHMARK_ROUNDS): timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"round: {round_idx}: iters: {MAX_ITERS}; images: {total_images}; " f"time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} ") del data_iterator del dataloader
def recreate_data_iterator( self, phase_type: str, epoch: int, compute_start_iter: bool, train_phase_idx: int, ): """ Recreate data iterator (including multiprocessing workers) and destroy the previous iterators. This is called when we load a new checkpoint or when phase changes during the training (one epoch to the next). DataSampler may need to be informed on those events to update the epoch and start_iteration so that the data is deterministically shuffled, so we call them here. """ start_iter = 0 if compute_start_iter: start_iter = self._compute_start_iter_from_checkpoint(phase_type) self.set_epoch(phase_type, epoch, start_iter, train_phase_idx) # Gives sampler same seed for entire distributed group as per pytorch documentation. sampler_seed = self.config["SEED_VALUE"] dataset = self.datasets[phase_type] # For OSS, this will always return false. # Otherwise, we will rebuild the dataloader after every phase. if dataset.rebuild_dataloader(): dataloader = build_dataloader( dataset=dataset, dataset_config=self.config.DATA[phase_type.upper()], num_dataloader_workers=self.config.DATA.NUM_DATALOADER_WORKERS, pin_memory=self.config.DATA.PIN_MEMORY, multi_processing_method=self.config.MULTI_PROCESSING_METHOD, device=self.device, sampler_seed=sampler_seed, split=phase_type, ) # delete old dataloader and reset it. del self.dataloaders[phase_type] gc.collect() self.dataloaders[phase_type] = dataloader # delete old dataiterator and reset it. del self.data_iterator gc.collect() self.data_iterator = iter(self.dataloaders[phase_type])