def image_decoder_slice_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') anch = fn.constant(fdata=.1) sh = fn.constant(fdata=.4) decoded = fn.decoders.image_slice(encoded, anch, sh, axes=0, device=device) pipe.set_outputs(decoded) return pipe
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) depthwise = fn.coin_flip() horizontal = fn.coin_flip() vertical = fn.coin_flip() data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.flip(data, depthwise=depthwise, horizontal=horizontal, vertical=vertical) pipe.set_outputs(processed) return pipe
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) paste_posx = fn.uniform(range=(0, 1)) paste_posy = fn.uniform(range=(0, 1)) paste_ratio = fn.uniform(range=(1, 2)) processed = fn.bbox_paste(data, paste_x=paste_posx, paste_y=paste_posy, ratio=paste_ratio) pipe.set_outputs(processed) return pipe
def check_bad_device(device_id): test_data_shape = [1, 3, 0, 4] def get_data(): out = [ np.empty(test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=device_id) outs = fn.external_source(source=get_data, device="gpu") pipe.set_outputs(outs) assert_raises(RuntimeError, pipe.build)
def test_fn_python_function(): pipe = Pipeline(1, 1, 0, exec_pipelined=False, exec_async=False) batch1 = [np.array([1, 2, 3])] batch2 = [np.array([2, 3, 4])] # we need a context, because we use an operator with potential side-effects (python_function) with pipe: src = fn.external_source([batch1, batch2]) out = fn.python_function(src, function=lambda x: x + 1) pipe.set_outputs(out) pipe.build() assert (np.array_equal(pipe.run()[0].at(0), batch1[0] + 1)) assert (np.array_equal(pipe.run()[0].at(0), batch2[0] + 1))
def test_external_source_gen_function_partial(): pipe = Pipeline(1, 3, 0) def gen(base): for i in range(5): yield [make_array([i + base], dtype=datapy.float32)] pipe.set_outputs( fn.external_source(functools.partial(gen, 1.5), cycle=True)) pipe.build() for _ in range(3): for i in range(5): check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
def check_dim_mismatch(device, test_data_root, names): pipe = Pipeline(2, 2, 0) pipe.set_outputs( fn.numpy_reader(device=device, file_root=test_data_root, files=names)) pipe.build() err = None try: pipe.run() except RuntimeError as thrown: err = thrown # asserts should not be in except block to avoid printing nested exception on failure assert err, "Exception not thrown" assert "Inconsistent data" in str( err), "Unexpected error message: {}".format(err)
def test_external_source_collection_cycling(): pipe = Pipeline(1, 3, 0) batches = [[make_array([1.5, 2.5], dtype=datapy.float32)], [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]] pipe.set_outputs(fn.external_source(batches, cycle=True)) pipe.build() # epochs are cycles over the source iterable for _ in range(3): for batch in batches: batch = asnumpy(batch) check_output(pipe.run(), batch)
def check_generic_gaussian_blur(batch_size, sigma, window_size, shape, layout, axes, op_type="cpu", in_dtype=np.uint8, out_dtype=types.NO_TYPE): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) data = RandomlyShapedDataIterator(batch_size, max_shape=shape, dtype=in_dtype) # Extract the numpy type from DALI, we can have float32 or the same as input if out_dtype == types.NO_TYPE: result_type = in_dtype elif dali_type(in_dtype) == out_dtype: result_type = in_dtype else: result_type = np.float32 with pipe: input = fn.external_source(data, layout=layout) if op_type == "gpu": input = input.gpu() blurred = fn.gaussian_blur(input, device=op_type, sigma=sigma, window_size=window_size, dtype=out_dtype) pipe.set_outputs(blurred, input) pipe.build() for _ in range(test_iters): result, input = pipe.run() if op_type == "gpu": result = result.as_cpu() input = input.as_cpu() input = to_batch(input, batch_size) skip_axes = count_skip_axes(layout) baseline = [ gaussian_baseline(img, sigma, window_size, axes, skip_axes, dtype=result_type) for img in input ] max_error = 1 if result_type != np.float32 else 1e-04 check_batch(result, baseline, batch_size, max_allowed_error=max_error)
def create_dali_pipeline(batch_size, num_threads, device_id, data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): pipeline = Pipeline(batch_size, num_threads, device_id, seed=12 + device_id) with pipeline: images, labels = fn.file_reader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.image_decoder_random_crop(images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.image_decoder(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255], mirror=mirror) labels = labels.gpu() pipeline.set_outputs(images, labels) return pipeline
def tfrecord_pipeline(dspath, batch_size, num_threads, device="cpu", device_id=None, shard_id=0, num_shards=1, reader_name="Reader", seq=True, chroms=False, chroms_vlog=False, target=True, target_vlog=True, label=False, random_shuffle=True): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) feature_description = {} feature_description["seq"] = tfrec.VarLenFeature(tfrec.float32, -1.0) feature_description["label"] = tfrec.FixedLenFeature([], tfrec.int64, -1) feature_description["target"] = tfrec.FixedLenFeature([], tfrec.float32, -1.0) for ct in dspath["chromatin_tracks"]: feature_description[ct] = tfrec.VarLenFeature(tfrec.float32, -1.0) with pipe: inputs = fn.readers.tfrecord( name=reader_name, path=dspath['TFRecord'], index_path=dspath['TFRecord_idx'], features=feature_description, shard_id = shard_id, num_shards = num_shards, random_shuffle=random_shuffle, read_ahead=True, prefetch_queue_depth=20, pad_last_batch=True) if device=="gpu": inputs['seq'] = inputs['seq'].gpu() for ct in dspath["chromatin_tracks"]: inputs[ct] = inputs[ct].gpu() inputs['target'] = inputs['target'].gpu() inputs['label'] = inputs['label'].gpu() seqdata = fn.expand_dims(inputs['seq'], axes=1, device=device) seqdata = fn.reshape(seqdata, shape=(4, -1), device=device) chromsdata = fn.cat(*[fn.expand_dims(inputs[ct], axes=0, device=device) for ct in dspath["chromatin_tracks"]], axis=0, device=device) sample = [] if seq: sample.append(seqdata) if chroms: if chroms_vlog: sample.append(log(chromsdata + 1)) else: sample.append(chromsdata) if target: if target_vlog: sample.append(log(inputs['target'] + 1)) else: sample.append(inputs['target']) if label: sample.append(inputs['label']) pipe.set_outputs(*sample) return pipe
def check_pad_per_sample_shapes_and_alignment(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.random.uniform(range=(0., 1.), shape=in_shape) if device == 'gpu': in_data = in_data.gpu() req_shape = fn.cast(fn.random.uniform(range=(21, 30), shape=(ndim, )), dtype=types.INT32) req_align = fn.cast(fn.random.uniform(range=(3, 5), shape=(ndim, )), dtype=types.INT32) out_pad_shape = fn.pad(in_data, axes=axes, align=None, shape=req_shape) out_pad_align = fn.pad(in_data, axes=axes, align=req_align, shape=None) out_pad_both = fn.pad(in_data, axes=axes, align=req_align, shape=req_shape) pipe.set_outputs(in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] assert (in_shape == in_data.shape).all() # Pad to explicit shape assert (out_pad_shape.shape >= in_shape).all() assert (req_shape == out_pad_shape.shape).all() # Alignment only assert (out_pad_align.shape >= in_shape).all() assert is_aligned(out_pad_align.shape, req_align, axes) # Explicit shape + alignment assert (out_pad_both.shape >= in_shape).all() assert (req_shape <= out_pad_both.shape).all() assert is_aligned(out_pad_both.shape, req_align, axes)
def test_mxnet_iterator_empty_array(): from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator import mxnet as mx batch_size = 4 size = 5 all_np_types = [np.bool_, np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float_, np.float32, np.float16, np.short, np.long, np.longlong, np.ushort, np.ulonglong] np_types = [] # store in np_types only types supported by MXNet for t in all_np_types: try: mx.nd.zeros([2, 2, 2], ctx=None, dtype=t) np_types.append(t) except mx.base.MXNetError: pass test_data_shape = [1, 3, 0, 4] def get_data(): # create batch of [type_a, type_a, type_b, type_b, ...] out = [[np.empty(test_data_shape, dtype = t)] * batch_size for t in np_types] out = [val for pair in zip(out, out) for val in pair] return out pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0) outs = fn.external_source(source = get_data, num_outputs = len(np_types) * 2) pipe.set_outputs(*outs) pipe.build() # create map of [(data, type_a), (label, type_a), ...] data_map = [('data_{}'.format(i), MXNetIterator.DATA_TAG) for i, t in enumerate(np_types)] label_map = [('label_{}'.format(i), MXNetIterator.LABEL_TAG) for i, t in enumerate(np_types)] out_map = [val for pair in zip(data_map, label_map) for val in pair] iterator = MXNetIterator( pipe, output_map=out_map, size=size, dynamic_shape=True) for batch in iterator: for d, t in zip(batch[0].data, np_types): shape = d.asnumpy().shape assert shape[0] == batch_size print(shape) assert np.array_equal(shape[1:], test_data_shape) assert d.asnumpy().dtype == t
def test_move_to_device_middle(): test_data_shape = [1, 3, 0, 4] def get_data(): out = [ np.empty(test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None) data = fn.external_source(source=get_data) outs = fn.rotate(data.gpu(), angle=25) pipe.set_outputs(outs) assert_raises(RuntimeError, pipe.build)
def test_external_source_generator_cycle_error(): _ = Pipeline(1, 3, 0) def gen(): for i in range(5): yield [make_array([i + 1.5], dtype=datapy.float32)] fn.external_source(gen(), cycle=False) # no cycle - OK with assert_raises( TypeError, glob= "Cannot cycle through a generator * pass that function instead as `source`." ): fn.external_source(gen(), cycle=True) # cycle over generator - error expected
def test_external_source_with_sample_info(): batch_size = 7 for attempt in range(10): pipe = Pipeline(batch_size, 3, 0) def src(si): assert(si.idx_in_epoch == batch_size * si.iteration + si.idx_in_batch) return make_array([attempt * 100 + si.iteration * 10 + si.idx_in_batch + 1.5], dtype=datapy.float32) pipe.set_outputs(fn.external_source(src, batch=False)) pipe.build() for i in range(10): batch = [np.array([attempt * 100 + i * 10 + s + 1.5], dtype=np.float32) for s in range(batch_size)] check_output(pipe.run(), batch)
def test_python_function_cpu(): from PIL import Image def resize(image): return np.array(Image.fromarray(image).resize((50, 10))) pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None, exec_async=False, exec_pipelined=False) check_single_input(fn.python_function, function=resize, exec_async=False, exec_pipelined=False)
def run_reduce_with_layout(batch_size, get_batch, reduction, axes, axis_names, batch_fn): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: input = fn.external_source(source=get_batch, layout="ABC") reduced = reduction(input, keep_dims=False, axes=axes) reduced_by_name = reduction(input, keep_dims=False, axis_names=axis_names) pipe.set_outputs(reduced, reduced_by_name) pipe.build() run_and_compare_with_layout(batch_fn, pipe)
def get_gaussian_pipe(batch_size, sigma, window_size, op_type): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: input, _ = fn.file_reader(file_root=images_dir, shard_id=0, num_shards=1) decoded = fn.image_decoder(input, device="cpu", output_type=types.RGB) if op_type == "gpu": decoded = decoded.gpu() blurred = fn.gaussian_blur(decoded, device=op_type, sigma=sigma, window_size=window_size) pipe.set_outputs(blurred, decoded) return pipe
def pipe(max_batch_size, input_data, device, /): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0, exec_async=False, exec_pipelined=False) with pipe: data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.python_function(data, function=resize, num_outputs=1) pipe.set_outputs(processed) return pipe
def _test_feed_input(device): src_pipe, batch_size = build_src_pipe(device) dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False) dst_pipe.set_outputs(fn.external_source(name="ext", device=device)) dst_pipe.build() for iter in range(3): out1 = src_pipe.run() dst_pipe.feed_input("ext", out1[0]) out2 = dst_pipe.run() check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) spectrum = fn.spectrogram(data, nfft=60, window_length=50, window_step=25) mel = fn.mel_filter_bank(spectrum) dec = fn.to_decibels(mel) processed = fn.mfcc(dec) pipe.set_outputs(processed) return pipe
def test_external_source_with_iter(): for attempt in range(10): pipe = Pipeline(1, 3, 0) pipe.set_outputs( fn.external_source(lambda i: [ make_array([attempt * 100 + i * 10 + 1.5], dtype=datapy.float32) ])) pipe.build() for i in range(10): check_output( pipe.run(), [np.array([attempt * 100 + i * 10 + 1.5], dtype=np.float32)])
def test_combine_transforms_correct_order(batch_size=3, num_threads=4, device_id=0): ndim = 2 pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: import nvidia.dali.fn.transforms as T t1 = T.translation(offset=(1, 2)) t2 = T.rotation(angle=30.0) t12 = T.rotation(T.translation(offset=(1, 2)), angle=30.0) t21 = T.translation(T.rotation(angle=30.0), offset=(1, 2)) pipe.set_outputs(T.combine(t1, t2), t12, T.combine(t1, t2, reverse_order=True), t21) pipe.build() outs = pipe.run() for idx in range(batch_size): assert np.allclose(outs[0].at(idx), outs[1].at(idx), atol=1e-6) assert np.allclose(outs[2].at(idx), outs[3].at(idx), atol=1e-6)
def test_operator_coco_reader_same_images(): file_root = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'images') train_annotations = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'instances.json') coco_dir = os.path.join(test_data_root, 'db', 'coco') coco_dir_imgs = os.path.join(coco_dir, 'images') coco_pixelwise_dir = os.path.join(test_data_root, 'db', 'coco_pixelwise') coco_pixelwise_dir_imgs = os.path.join(coco_pixelwise_dir, 'images') for file_root, annotations_file in [ \ (coco_dir_imgs, os.path.join(coco_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances_rle_counts.json'))]: pipe = Pipeline(batch_size=1, num_threads=4, device_id=0) with pipe: inputs1, boxes1, labels1, *other = fn.coco_reader( file_root=file_root, annotations_file=train_annotations, name="reader1", seed=1234) inputs2, boxes2, labels2, *other = fn.coco_reader( file_root=file_root, annotations_file=train_annotations, polygon_masks=True, name="reader2") inputs3, boxes3, labels3, *other = fn.coco_reader( file_root=file_root, annotations_file=train_annotations, pixelwise_masks=True, name="reader3") pipe.set_outputs(inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3) pipe.build() epoch_sz = pipe.epoch_size("reader1") assert epoch_sz == pipe.epoch_size("reader2") assert epoch_sz == pipe.epoch_size("reader3") for i in range(epoch_sz): inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 = \ pipe.run() np.testing.assert_array_equal(inputs1.at(0), inputs2.at(0)) np.testing.assert_array_equal(inputs1.at(0), inputs3.at(0)) np.testing.assert_array_equal(labels1.at(0), labels2.at(0)) np.testing.assert_array_equal(labels1.at(0), labels3.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes2.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes3.at(0))
def test_seed_serialize(): batch_size = 64 class HybridPipe(Pipeline): def __init__(self, batch_size, num_threads, device_id): super(HybridPipe, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.CaffeReader(path=caffe_db_folder, random_shuffle=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, crop=(224, 224), image_type=types.RGB, mean=[128., 128., 128.], std=[1., 1., 1.]) self.coin = ops.CoinFlip() self.uniform = ops.Uniform(range=(0.0, 1.0)) self.iter = 0 def define_graph(self): self.jpegs, self.labels = self.input() images = self.decode(self.jpegs) mirror = self.coin() output = self.cmnp(images, mirror=mirror, crop_pos_x=self.uniform(), crop_pos_y=self.uniform()) return (output, self.labels) def iter_setup(self): pass n = 30 orig_pipe = HybridPipe(batch_size=batch_size, num_threads=2, device_id=0) s = orig_pipe.serialize() for i in range(50): pipe = Pipeline() pipe.deserialize_and_build(s) pipe_out = pipe.run() pipe_out_cpu = pipe_out[0].asCPU() img_chw_test = pipe_out_cpu.at(n) if i == 0: img_chw = img_chw_test assert (np.sum(np.abs(img_chw - img_chw_test)) == 0)
def get_random_pipeline(device, batch_size): pipe = Pipeline(batch_size, 4, 0) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.decoders.image(input, device='cpu', output_type=types.RGB) decoded = decoded.gpu() if device == 'gpu' else decoded tile = fn.cast(fn.uniform(range=(50, 200)), dtype=types.INT32) ratio = fn.uniform(range=(0.3, 0.7)) angle = fn.uniform(range=(-math.pi, math.pi)) grided = fn.grid_mask(decoded, device=device, tile=tile, ratio=ratio, angle=angle) pipe.set_outputs(grided, decoded, tile, ratio, angle) return pipe
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) data = dali.math.clamp(data, 0.1, 0.9) data = data * 2 dbl_data = data data = data + 3 data = data - 4 data = data / 5 data = data // 6 data = -data data = data + dbl_data pipe.set_outputs(data) return pipe
def test_python_function_cpu(): def resize(image): return np.array(Image.fromarray(image).resize((50, 10))) pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None, exec_async=False, exec_pipelined=False) with pipe: data = fn.external_source(source=get_data, layout="HWC") processed = fn.python_function(data, function=resize) pipe.set_outputs(processed) pipe.build() for _ in range(3): pipe.run()
def check_transform_translation_op(offset, has_input = False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): ndim = len(offset) pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim+1), seed = 1234) T1 = fn.transforms.translation(T0, device='cpu', offset=offset, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.translation(device='cpu', offset=offset) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = translate_affine_mat(offset=offset) T0 = outs[1] if has_input else None check_results(outs[0], batch_size, ref_mat, T0, reverse_order)