def test_pytorch_iterator_not_fill_last_batch_pad_last_batch(): from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator num_gpus = 1 batch_size = 100 pipes, data_size = create_pipeline( lambda gpu: COCOReaderPipeline(batch_size=batch_size, num_threads=4, shard_id=gpu, num_gpus=num_gpus, data_paths=data_sets[0], random_shuffle=False, stick_to_shard=False, shuffle_after_epoch=False, pad_last_batch=True), batch_size, num_gpus) dali_train_iter = PyTorchIterator(pipes, output_map=["data"], size=pipes[0].epoch_size("Reader"), fill_last_batch=False, last_batch_padded=True) img_ids_list, img_ids_list_set, mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size) assert len(img_ids_list) == data_size assert len(img_ids_list_set) == data_size assert len(set(mirrored_data)) != 1 dali_train_iter.reset() next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size) # there is no mirroring as data in the output is just cut off, # in the mirrored_data there is real data assert len(next_img_ids_list) == data_size assert len(next_img_ids_list_set) == data_size assert len(set(next_mirrored_data)) != 1
def test_pytorch_iterator_last_batch_pad_last_batch(): num_gpus = 1 batch_size = 100 iters = 0 pipes, data_size = create_pipeline( lambda gpu: COCOReaderPipeline(batch_size=batch_size, num_threads=4, shard_id=gpu, num_gpus=num_gpus, data_paths=data_sets[0], random_shuffle=True, stick_to_shard=False, shuffle_after_epoch=False, pad_last_batch=True), batch_size, num_gpus) dali_train_iter = PyTorchIterator(pipes, output_map=["data"], size=pipes[0].epoch_size("Reader"), fill_last_batch=True) img_ids_list, img_ids_list_set, mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size) assert len(img_ids_list) > data_size assert len(img_ids_list_set) == data_size assert len(set(mirrored_data)) == 1 dali_train_iter.reset() next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = \ gather_ids(dali_train_iter, lambda x: x["data"].squeeze().numpy(), lambda x: 0, data_size) assert len(next_img_ids_list) > data_size assert len(next_img_ids_list_set) == data_size assert len(set(next_mirrored_data)) == 1
eii = CXRImageIterator(batch_size, 0, 1, train_ids, train_labels, args.image_dir, image_format=args.image_format, encoding=args.label_encoding) pipe = CXRImagePipeline(batch_size=batch_size, num_threads=2, device_id=0, num_gpus=4, external_data=eii) pii = PyTorchIterator(pipe, size=eii.size, last_batch_padded=True, fill_last_batch=False) times = [] for e in range(1): for i, data in enumerate(pii): if i == 100: break if i != 0: stop = time() times.append(stop - start) print("epoch: {}, iter {}, real batch size: {}, data shape: {}". format(e, i, len(data[0]["data"]), data[0]['data'].shape)) img = data[0]['data'].squeeze().cpu().numpy() print(np.max(img)) print(np.min(img)) print(img.shape)
def test_stop_iteration_pytorch(): from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator test_stop_iteration(lambda pipe, size, auto_reset : PyTorchIterator(pipe, output_map=["data"], size=size, auto_reset=auto_reset), "PyTorchIterator")
def test_stop_iteration_pytorch_fail_single(): from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator fw_iter = lambda pipe, size, auto_reset: PyTorchIterator( pipe, output_map=["data"], size=size, auto_reset=auto_reset) check_stop_iter_fail_single(fw_iter)
def test_stop_iteration_pytorch(): from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator fw_iter = lambda pipe, size, auto_reset : PyTorchIterator(pipe, output_map=["data"], size=size, auto_reset=auto_reset) iter_name = "PyTorchIterator" for batch_size, epochs, iter_num, auto_reset, infinite in stop_teration_case_generator(): yield check_stop_iter, fw_iter, iter_name, batch_size, epochs, iter_num, auto_reset, infinite