def make_pipe(): image = fn.external_source(source=image_gen) if device == "gpu": image = image.gpu() if get_dynamic_axes: axes, rel_start, rel_shape = fn.external_source( source=get_dynamic_axes, num_outputs=3) else: axes = types.Constant(np.array([0, 1], dtype=np.int32), device="cpu") rel_start = fn.random.uniform(range=(0.1, 0.2), shape=(2, ), dtype=types.FLOAT, device=args_device) rel_shape = fn.random.uniform(range=(0.4, 0.6), shape=(2, ), dtype=types.FLOAT, device=args_device) if args_device == "gpu": sliced = fn.slice(image, rel_start, rel_shape, axes=axes) return image, axes, rel_start, rel_shape, sliced else: sliced1 = fn.slice(image, rel_start=rel_start, rel_shape=rel_shape, axes=axes) sliced2 = fn.slice(image, rel_start, rel_shape, axes=axes) return image, axes, rel_start, rel_shape, sliced1, sliced2
def check_slice_named_args_default_start_or_end(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [ np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source=get_data, layout="HWC") in_shape = np.array([5, 4]) start = np.array([1, 2]) shape = np.array([3, 1]) end = start + shape rel_start = start / in_shape rel_shape = shape / in_shape rel_end = end / in_shape outs = [ fn.slice(data, start=start, end=in_shape, axes=(0, 1)), fn.slice(data, start=[0, 0], end=end, axes=(0, 1)), fn.slice(data, start=start, axes=(0, 1)), fn.slice(data, end=end, axes=(0, 1)), ] pipe.set_outputs(*outs) pipe.build() for _ in range(3): outs = pipe.run() for sample in range(batch_size): np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[2][sample])) np.testing.assert_equal(np.array(outs[1][sample]), np.array(outs[3][sample]))
def define_graph(self): inputs, bboxes, labels, polygons, vertices = fn.readers.coco( file_root=self.file_root, annotations_file=self.annotation_file, skip_empty=True, shard_id=self.share_id, num_shards=self.num_gpus, ratio=True, ltrb=True, polygon_masks = True, random_shuffle=self.random_shuffle, shuffle_after_epoch=self.shuffle_after_epoch, name="Reader") input_shape = fn.slice(fn.cast(fn.peek_image_shape(inputs), dtype=types.INT32), 0, 2, axes=[0]) h = fn.slice(input_shape, 0, 1, axes = [0], dtype=types.FLOAT) w = fn.slice(input_shape, 1, 1, axes = [0], dtype=types.FLOAT) short_side = math.min(w, h) scale = fn.random.uniform(range=[0.3, 1.]) crop_side = fn.cast(math.ceil(scale * short_side), dtype=types.INT32) crop_shape = fn.cat(crop_side, crop_side) anchor_rel, shape_rel, bboxes, labels, bbox_indices = fn.random_bbox_crop( bboxes, labels, input_shape=input_shape, crop_shape=crop_shape, shape_layout="HW", thresholds=[0.], # No minimum intersection-over-union, for demo purposes allow_no_crop=False, # No-crop is disallowed, for demo purposes seed=-1, # Fixed random seed for deterministic results bbox_layout="xyXY", # left, top, right, back output_bbox_indices=True, # Output indices of the filtered bounding boxes total_num_attempts=1024, ) polygons, vertices = fn.segmentation.select_masks( bbox_indices, polygons, vertices ) images = fn.decoders.image_slice( inputs, anchor_rel, shape_rel, normalized_anchor=False, normalized_shape=False, device='mixed' ) images = fn.color_space_conversion(images, image_type=types.RGB, output_type=types.BGR) MT_1_vertices = fn.transforms.crop( to_start=(0.0, 0.0), to_end=fn.cat(w, h) ) MT_2_vertices = fn.transforms.crop( from_start=anchor_rel, from_end=(anchor_rel + shape_rel), to_start=(0.0, 0.0), to_end=(1., 1.) ) vertices = fn.coord_transform(fn.coord_transform(vertices, MT=MT_1_vertices), MT=MT_2_vertices) targets = fn.cat( bboxes, fn.reshape(vertices, shape=[-1, 10]), axis=1) interp_methods = [types.INTERP_LINEAR, types.INTERP_CUBIC, types.INTERP_LANCZOS3, types.INTERP_GAUSSIAN, types.INTERP_NN, types.INTERP_TRIANGULAR] interp_method = fn.random.uniform(values=[int(x) for x in interp_methods], dtype=types.INT32) interp_method = fn.reinterpret(interp_method, dtype=types.INTERP_TYPE) images = fn.resize(images, dtype=types.FLOAT, size=self.input_dim, interp_type=interp_method) labels = labels.gpu() targets = targets.gpu() return (images, targets, labels)
def make_pipe(): image = fn.external_source(source=image_gen) if device == 'gpu': image = image.gpu() axes, rel_start, rel_shape = fn.external_source(source=get_dynamic_axes, num_outputs=3) sliced1 = fn.slice(image, rel_start=rel_start, rel_shape=rel_shape, axes=axes) sliced2 = fn.slice(image, rel_start, rel_shape, axes=axes) return image, axes, rel_start, rel_shape, sliced1, sliced2
def crop_fn(self, img, lbl): center = fn.segmentation.random_mask_pixel(lbl, foreground=fn.coin_flip(probability=self.oversampling)) crop_anchor = self.slice_fn(center, 1, self.dim) - self.crop_shape // 2 adjusted_anchor = math.max(0, crop_anchor) max_anchor = self.slice_fn(fn.shapes(lbl), 1, self.dim) - self.crop_shape crop_anchor = math.min(adjusted_anchor, max_anchor) img = fn.slice(img.gpu(), crop_anchor, self.crop_shape, axis_names=self.axis_name, out_of_bounds_policy="pad") lbl = fn.slice(lbl.gpu(), crop_anchor, self.crop_shape, axis_names=self.axis_name, out_of_bounds_policy="pad") return img, lbl
def make_pipe(): encoded, _ = fn.readers.caffe(path=caffe_db_folder, random_shuffle=False) image = fn.decoders.image(encoded, device="cpu", output_type=types.RGB) if device == 'gpu': image = image.gpu() image = fn.cast(image, dtype=dtype) sliced1 = fn.slice(image, 0, 3, axes=(2,)) sliced2 = fn.slice(image, rel_start=(0, 0, 0), rel_end=(1, 1, 1), axis_names="HWC") return image, sliced1, sliced2
def make_pipe(): fake_data = fn.constant(idata=0, shape=[10, 10, 3], dtype=types.FLOAT, device=device) axes = fn.random.uniform(range=wrong_axes_range, shape=(2,), dtype=types.INT32) rel_start = fn.random.uniform(range=[0.0, 0.3], shape=(2,), dtype=types.FLOAT) rel_shape = fn.random.uniform(range=[0.4, 0.6], shape=(2,), dtype=types.FLOAT) if named_args: sliced = fn.slice(fake_data, rel_start=rel_start, rel_shape=rel_shape, axes=axes) else: sliced = fn.slice(fake_data, rel_start, rel_shape, axes=axes) return sliced
def test_slice_fn(): pipe = dali.pipeline.Pipeline(1,1,0) src = fn.external_source([[np.array([[10,11,12],[13,14,15],[16,17,18]], dtype=np.float32)]]) out_cpu = fn.slice(src, np.array([1,1]), np.array([2,1]), axes=[0,1]) out_gpu = fn.slice(src.gpu(), np.array([1,1]), np.array([2,1]), axes=[0,1]) pipe.set_outputs(out_cpu, out_gpu) pipe.build() o = pipe.run() assert np.array_equal(o[0].at(0), np.array([[14],[17]])) assert np.array_equal(o[1].as_cpu().at(0), np.array([[14],[17]]))
def crop_fn(self, img, lbl): center = fn.segmentation.random_mask_pixel(lbl, foreground=fn.coin_flip(probability=self.oversampling, **self.aug_seed_kwargs), **self.aug_seed_kwargs) crop_anchor = self.slice_fn(center) - self.crop_shape // 2 adjusted_anchor = math.max(0, crop_anchor) max_anchor = self.slice_fn(fn.shapes(lbl)) - self.crop_shape crop_anchor = math.min(adjusted_anchor, max_anchor) img = fn.slice(img, crop_anchor, self.crop_shape, axis_names="DHW", out_of_bounds_policy="pad") lbl = fn.slice(lbl, crop_anchor, self.crop_shape, axis_names="DHW", out_of_bounds_policy="pad") return img, lbl
def dali_reflect_pad_graph(x, x_len, pad_amount): def flip_1d(x): # TODO(janton): remove the layout trick when Flip supports arbitrary data layouts x = fn.reshape(x, shape=(-1, 1, 1), layout="HWC") x = fn.flip(x, vertical=1) x = fn.reshape(x, shape=(-1, ), layout="t") return x pad_start = fn.slice(x, 1, pad_amount, axes=(0, )) pad_start = flip_1d(pad_start) pad_end = fn.slice(x, x_len - pad_amount - 1, pad_amount, axes=(0, )) pad_end = flip_1d(pad_end) x = fn.cat(pad_start, x, pad_end, axis=0) return x
def slice_pipeline(get_anchor, get_shape): data = fn.external_source(source=get_data, layout='HWC') anchors = fn.external_source(source=get_anchor) shape = fn.external_source(source=get_shape) processed = fn.slice(data, anchors, shape, out_of_bounds_policy='pad') return processed
def check_slice_named_args_errors(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [ np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source=get_data, layout="HWC") in_shape = np.array([5, 4]) start = np.array([1, 2]) shape = np.array([3, 1]) outs = [ fn.slice(data, start, shape, start=start, end=start + shape, shape=shape, axes=(0, 1)), ] pipe.set_outputs(*outs) with assert_raises(RuntimeError): pipe.build() for _ in range(1): outs = pipe.run()
def create_decoder_slice_pipeline(data_path, device): jpegs, _ = fn.readers.file(file_root = data_path, shard_id = 0, num_shards = 1, name = "Reader") anchor = fn.random.uniform(range=[0.05, 0.15], shape=(2,)) shape = fn.random.uniform(range=[0.5, 0.7], shape=(2,)) images_sliced_1 = fn.decoders.image_slice(jpegs, anchor, shape, device = device, hw_decoder_load = 0.7, output_type = types.RGB, axes = (0, 1)) images = fn.decoders.image(jpegs, device = device, hw_decoder_load = 0.7, output_type = types.RGB) images_sliced_2 = fn.slice(images, anchor, shape, axes = (0, 1)) return images_sliced_1, images_sliced_2
def define_graph(self): jpegs, dummy_labels = self.input() self.labels = self.label() self.crop_dim = self.crops() anchor = fn.reshape(fn.slice(self.crop_dim, 0, 2, axes=[1]), shape=[-1]) shape = fn.reshape(fn.slice(self.crop_dim, 2, 2, axes = [1]), shape= [-1]) anchor = self.cast(anchor) shape = self.cast(shape) images = self.decode(jpegs) images = self.res(images) # decode and slicing jpegs = fn.slice(jpegs, anchor, shape, axes= [0,1], device= 'gpu') jpegs = self.res(jpegs) return (images, self.labels, self.crop_dim)
def jpeg_distortion_pipe(device='cpu', quality=None): iii = InputImagesIter(seq_len) inputs = fn.external_source(source=iii, layout='FHWC', batch=False) if device == 'gpu': inputs = inputs.gpu() if quality is None: quality = fn.random.uniform(range=[1, 99], dtype=types.INT32) tmp = fn.jpeg_compression_distortion(inputs, quality=quality) outs = [] for i in range(seq_len): # First, slice of the distorted sequence outs.append(fn.slice(tmp, axes=(0, ), start=(i, ), end=(i + 1, ))) # Second, distorted slice of the input slice_in = fn.slice(inputs, axes=(0, ), start=(i, ), end=(i + 1, )) outs.append( fn.jpeg_compression_distortion(slice_in, quality=quality)) return tuple(outs)
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) anch = fn.constant(fdata=.1, device='cpu') sh = fn.constant(fdata=.5, device='cpu') data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.slice(data, anch, sh, axes=0, device=device) pipe.set_outputs(processed) return pipe
def test_pipe(): data = fn.external_source(source=get_data) shape = types.ScalarConstant(10) anchor = types.ScalarConstant(5) if device != 'cpu': data = data.gpu() sliced = fn.slice(data, start = anchor, shape = shape, axes=[0], device=device) return data, sliced, shape, anchor
def biased_crop_fn(self, img, label): roi_start, roi_end = fn.segmentation.random_object_bbox( label, format="start_end", foreground_prob=self.oversampling, background=0, seed=self.internal_seed, device="cpu", cache_objects=True, ) anchor = fn.roi_random_crop(label, roi_start=roi_start, roi_end=roi_end, crop_shape=[1, *self.patch_size]) anchor = fn.slice(anchor, 1, 3, axes=[0]) # drop channels from anchor img, label = fn.slice( [img, label], anchor, self.crop_shape, axis_names="DHW", out_of_bounds_policy="pad", device="cpu" ) return img.gpu(), label.gpu()
def check_random_mask_pixel(ndim=2, batch_size=3, min_extent=20, max_extent=50): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: # Input mask in_shape_dims = [fn.cast(fn.random.uniform(range=(min_extent, max_extent + 1)), dtype=types.INT32) for _ in range(ndim)] in_shape = fn.stack(*in_shape_dims) in_mask = fn.cast(fn.random.uniform(range=(0, 2), shape=in_shape), dtype=types.INT32) # > 0 fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # >= 0.99 fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # == 2 fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) coin_flip = fn.random.coin_flip(probability=0.7) fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) # Demo purposes: Taking a random pixel and produce a valid anchor to feed slice # We want to force the center adjustment, thus the large crop shape crop_shape = in_shape - 2 anchor = fn.cast(fg_pixel1, dtype=types.INT32) - crop_shape // 2 anchor = math.min(math.max(0, anchor), in_shape - crop_shape) out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, anchor, crop_shape, out_mask) pipe.build() for iter in range(3): outputs = pipe.run() for idx in range(batch_size): in_mask = outputs[0].at(idx) fg_pixel1 = outputs[1].at(idx).tolist() fg_pixel2 = outputs[2].at(idx).tolist() fg_pixel3 = outputs[3].at(idx).tolist() rnd_pixel = outputs[4].at(idx).tolist() coin_flip = outputs[5].at(idx).tolist() fg_biased = outputs[6].at(idx).tolist() anchor = outputs[7].at(idx).tolist() crop_shape = outputs[8].at(idx).tolist() out_mask = outputs[9].at(idx) assert in_mask[tuple(fg_pixel1)] > 0 assert in_mask[tuple(fg_pixel2)] > 0.99 assert in_mask[tuple(fg_pixel3)] == 2 assert in_mask[tuple(fg_biased)] > 0 or not coin_flip for d in range(ndim): assert 0 <= anchor[d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] assert out_mask.shape == tuple(crop_shape)
def check_pad_to_square(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.reshape(fn.random.uniform(range=(0., 1.), shape=in_shape), layout="HW") shape = fn.shapes(in_data, dtype=types.INT32) h = fn.slice(shape, 0, 1, axes=[0]) w = fn.slice(shape, 1, 1, axes=[0]) side = math.max(h, w) if device == 'gpu': in_data = in_data.gpu() out_data = fn.pad(in_data, axis_names="HW", shape=fn.cat(side, side, axis=0)) pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_data, out_data = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape max_side = max(in_shape) for s in out_data.shape: assert s == max_side np.testing.assert_equal(out_data[:in_shape[0], :in_shape[1]], in_data) np.testing.assert_equal(out_data[in_shape[0]:, :], 0) np.testing.assert_equal(out_data[:, in_shape[1]:], 0)
def make_pipe(): fake_data = fn.constant(idata=0, shape=[10, 10, 3], dtype=types.FLOAT, device="cpu") rel_start = fn.random.uniform(range=[0.0, 0.3], shape=(2, ), dtype=types.FLOAT, device="gpu") rel_shape = fn.random.uniform(range=[0.4, 0.6], shape=(2, ), dtype=types.FLOAT, device="gpu") sliced = fn.slice(fake_data, rel_start, rel_shape, device="cpu") return sliced
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), 2 if channel_first else 1, 2, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def numpy_reader_roi_pipe(file_root, device="cpu", file_filter='*.npy', roi_start=None, rel_roi_start=None, roi_end=None, rel_roi_end=None, roi_shape=None, rel_roi_shape=None, roi_axes=None, default_axes=[], out_of_bounds_policy=None, fill_value=None): data = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, shard_id=0, num_shards=1, cache_header_information=False) roi_data = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, roi_start=roi_start, rel_roi_start=rel_roi_start, roi_end=roi_end, rel_roi_end=rel_roi_end, roi_shape=roi_shape, rel_roi_shape=rel_roi_shape, roi_axes=roi_axes, out_of_bounds_policy=out_of_bounds_policy, fill_value=fill_value, shard_id=0, num_shards=1, cache_header_information=False) sliced_data = fn.slice( data, start=roi_start, rel_start=rel_roi_start, end=roi_end, rel_end=rel_roi_end, shape=roi_shape, rel_shape=rel_roi_shape, axes=roi_axes or default_axes, # Slice has different default (axis_names="WH") out_of_bounds_policy=out_of_bounds_policy, fill_values=fill_value) return roi_data, sliced_data
def _test_empty_input(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() in_rel_shapes = np.ones([batch_size, dim], dtype=np.float32) in_rel_shapes[::2, :] *= 0 # all zeros in every second sample degenerate_images = fn.slice(images, np.zeros([dim]), fn.external_source(lambda: in_rel_shapes), axes=list(range(dim))) sizes = np.random.randint(20, 50, [batch_size, dim], dtype=np.int32) size_inp = fn.external_source( lambda: [x.astype(np.float32) for x in sizes]) resize_no_empty = fn.resize(images, size=size_inp, mode="not_larger") resize_with_empty = fn.resize(degenerate_images, size=size_inp, mode="not_larger") pipe.set_outputs(resize_no_empty, resize_with_empty) pipe.build() for it in range(3): out_no_empty, out_with_empty = pipe.run() if device == "gpu": out_no_empty = out_no_empty.as_cpu() out_with_empty = out_with_empty.as_cpu() for i in range(batch_size): if i % 2 != 0: assert np.array_equal(out_no_empty.at(i), out_with_empty.at(i)) else: assert np.prod(out_with_empty.at(i).shape) == 0
def check_slice_named_args(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [np.random.randint(0, 255, size = test_data_shape, dtype = np.uint8) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source = get_data, layout = "HWC") in_shape_list = [5, 4] start_list = [1, 2] shape_list = [3, 1] in_shape = np.array(in_shape_list) start = np.array(start_list) shape = np.array(shape_list) end_list = [start_list[i] + shape_list[i] for i in range(2)] end = start + shape rel_start_list = [start_list[i] / in_shape_list[i] for i in range(2)] rel_start = start / in_shape rel_shape_list = [shape_list[i] / in_shape_list[i] for i in range(2)] rel_shape = shape / in_shape rel_end_list = [end_list[i] / in_shape_list[i] for i in range(2)] rel_end = end / in_shape outs = [ fn.slice(data, start, shape, axes = (0, 1)), fn.slice(data, rel_start, rel_shape, axes = (0, 1)), ] for start_arg in [start, start_list]: for shape_arg in [shape, shape_list]: outs += [fn.slice(data, start=start_arg, shape=shape_arg, axes = (0, 1))] for end_arg in [end, end_list]: outs += [fn.slice(data, start=start_arg, end=end_arg, axes = (0, 1))] for rel_start_arg in [rel_start, rel_start_list]: for rel_shape_arg in [rel_shape, rel_shape_list]: outs += [fn.slice(data, rel_start=rel_start_arg, rel_shape=rel_shape_arg, axes = (0, 1))] for rel_end_arg in [rel_end, rel_end_list]: outs += [fn.slice(data, rel_start=rel_start_arg, rel_end=rel_end_arg, axes = (0, 1))] for shape_arg in [shape, shape_list]: outs += [fn.slice(data, rel_start=rel_start_arg, shape=shape_arg, axes = (0, 1))] pipe.set_outputs(*outs) pipe.build() for _ in range(3): outs = pipe.run() for out_idx in range(1, len(outs)): for sample in range(batch_size): np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[out_idx][sample]))
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape shape_anchor = np.array([2 if channel_first else 1], dtype=np.float32) shape_shape = np.array([2], dtype=np.float32) ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), types.Constant(shape_anchor, device="cpu"), types.Constant(shape_shape, device="cpu"), normalized_anchor=False, normalized_shape=False, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def dali_frame_splicing_graph(x, nfeatures, x_len, stacking=1, subsampling=1): if stacking > 1: seq = [x] for n in range(1, stacking): f = fn.slice(x, n, x_len, axes=(1, ), out_of_bounds_policy='pad', fill_values=0) seq.append(f) x = fn.cat(*seq, axis=0) nfeatures = nfeatures * stacking if subsampling > 1: out_len = (x_len + subsampling - 1) // subsampling m = fn.transforms.scale(scale=[subsampling, 1], center=[0.5, 0]) x = fn.reshape(x, rel_shape=[1, 1, -1], layout="HWC") # Layout required by WarpAffine size = fn.cat(nfeatures, out_len) x = fn.warp_affine(x, matrix=m, size=size, interp_type=types.INTERP_NN) x = fn.reshape(x, rel_shape=[1, 1], layout="ft") return x
def test_slice_cpu(): anch_shape = [2] def get_anchors(): out = [(np.random.randint(1, 256, size=anch_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size)] return out def get_shape(): out = [(np.random.randint(1, 256, size=anch_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) data = fn.external_source(source=get_data, layout="HWC") anchors = fn.external_source(source=get_anchors) shape = fn.external_source(source=get_shape) processed = fn.slice(data, anchors, shape, out_of_bounds_policy="pad") pipe.set_outputs(processed) pipe.build() for _ in range(3): pipe.run()
def rnnt_train_pipe(files, sample_rate, pad_amount=0, preemph_coeff=.97, window_size=.02, window_stride=.01, window="hann", nfeatures=64, nfft=512, frame_splicing_stack=1, frame_splicing_subsample=1, lowfreq=0.0, highfreq=None, normalize_type='per_feature', speed_perturb=False, silence_trim=False, device='cpu'): assert normalize_type == 'per_feature' or normalize_type == 'all_features' norm_axes = [1] if normalize_type == 'per_feature' else [0, 1] win_len, win_hop = win_args(sample_rate, window_size, window_stride) window_fn = torch_windows.get(window, None) window_fn_arg = window_fn( win_len, periodic=False).numpy().tolist() if window_fn else None data, _ = fn.readers.file(files=files, device="cpu", random_shuffle=False, shard_id=0, num_shards=1) audio, _ = fn.decoders.audio(data, dtype=types.FLOAT, downmix=True) # splicing with subsampling doesn't work if audio_len is a GPU data node if device == 'gpu' and frame_splicing_subsample == 1: audio = audio.gpu() # Speed perturbation 0.85x - 1.15x if speed_perturb: target_sr_factor = fn.random.uniform(device="cpu", range=(1 / 1.15, 1 / 0.85)) audio = fn.experimental.audio_resample(audio, scale=target_sr_factor) # Silence trimming if silence_trim: begin, length = fn.nonsilent_region(audio, cutoff_db=-80) audio = fn.slice(audio, begin, length, axes=[0]) audio_shape = fn.shapes(audio, dtype=types.INT32) orig_audio_len = fn.slice(audio_shape, 0, 1, axes=(0, )) # If we couldn't move to GPU earlier, do it now if device == 'gpu' and frame_splicing_subsample > 1: audio = audio.gpu() if pad_amount > 0: audio_len = orig_audio_len + 2 * pad_amount padded_audio = dali_reflect_pad_graph(audio, orig_audio_len, pad_amount) else: audio_len = orig_audio_len padded_audio = audio # Preemphasis filter preemph_audio = fn.preemphasis_filter(padded_audio, preemph_coeff=preemph_coeff, border='zero') # Spectrogram spec_len = audio_len // win_hop + 1 spec = fn.spectrogram(preemph_audio, nfft=nfft, window_fn=window_fn_arg, window_length=win_len, window_step=win_hop, center_windows=True, reflect_padding=True) # Mel spectrogram mel_spec = fn.mel_filter_bank(spec, sample_rate=sample_rate, nfilter=nfeatures, freq_low=lowfreq, freq_high=highfreq) # Log log_features = fn.to_decibels(mel_spec + 1e-20, multiplier=np.log(10), reference=1.0, cutoff_db=-80) # Frame splicing if frame_splicing_stack > 1 or frame_splicing_subsample > 1: log_features_spliced = dali_frame_splicing_graph( log_features, nfeatures, spec_len, stacking=frame_splicing_stack, subsampling=frame_splicing_subsample) else: log_features_spliced = log_features # Normalization if normalize_type: norm_log_features = fn.normalize(log_features_spliced, axes=norm_axes, device=device, epsilon=4e-5, ddof=1) else: norm_log_features = log_features_spliced return norm_log_features, log_features_spliced, log_features, mel_spec, spec, preemph_audio, padded_audio, audio
def check_bbox_random_crop_adjust_polygons(file_root, annotations_file, batch_size=3, num_iters=4, num_threads=4, device_id=0, seed=1234): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=seed) with pipe: # Read data from COCO # ratio=True means both bboxes and masks coordinates will be # relative to the image dimensions (range [0.0, 1.0]) inputs, in_bboxes, labels, in_polygons, in_vertices = \ fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=0, num_shards=1, ratio=True, ltrb=True, polygon_masks=True ) # Generate a random crop. out_bboxes are adjusted to the crop window slice_anchor, slice_shape, out_bboxes, labels, bbox_indices = \ fn.random_bbox_crop( in_bboxes, labels, aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout='xyXY', output_bbox_indices=True ) # Crop the image _ = fn.decoders.image_slice(inputs, slice_anchor, slice_shape, device='mixed', axis_names='WH') sel_polygons, sel_vertices = fn.segmentation.select_masks( bbox_indices, in_polygons, in_vertices) # Adjust masks coordinates to the coordinate space of the cropped image MT = fn.transforms.crop(from_start=slice_anchor, from_end=(slice_anchor + slice_shape)) out_vertices = fn.coord_transform(sel_vertices, MT=MT) # Converting to absolute coordinates (demo purposes) image_shape = fn.peek_image_shape(inputs, dtype=types.FLOAT) h = fn.slice(image_shape, 0, 1, axes=[0]) w = fn.slice(image_shape, 1, 1, axes=[0]) # Original bboxes bbox_x = fn.slice(in_bboxes, 0, 1, axes=[1]) bbox_y = fn.slice(in_bboxes, 1, 1, axes=[1]) bbox_X = fn.slice(in_bboxes, 2, 1, axes=[1]) bbox_Y = fn.slice(in_bboxes, 3, 1, axes=[1]) in_bboxes_abs = fn.cat(bbox_x * w, bbox_y * h, bbox_X * w, bbox_Y * h, axis=1) # Transform to convert relative coordinates to absolute scale_rel_to_abs = fn.transforms.scale(scale=fn.cat(w, h)) # Selected vertices (relative coordinates) sel_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Output bboxes bbox2_x = fn.slice(out_bboxes, 0, 1, axes=[1]) bbox2_y = fn.slice(out_bboxes, 1, 1, axes=[1]) bbox2_X = fn.slice(out_bboxes, 2, 1, axes=[1]) bbox2_Y = fn.slice(out_bboxes, 3, 1, axes=[1]) out_bboxes_abs = fn.cat(bbox2_x * w, bbox2_y * h, bbox2_X * w, bbox2_Y * h, axis=1) # Output vertices (absolute coordinates) out_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Clamped coordinates out_vertices_clamped = math.clamp(out_vertices, 0.0, 1.0) out_vertices_clamped_abs = fn.coord_transform(out_vertices_clamped, MT=scale_rel_to_abs) pipe.set_outputs(in_vertices, sel_vertices, sel_vertices_abs, out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons, sel_polygons, image_shape, slice_anchor, slice_shape, bbox_indices) pipe.build() # Enough iterations to see an example with more than one bounding box for i in range(num_iters): outs = pipe.run() for j in range(batch_size): (in_vertices, sel_vertices, sel_vertices_abs, out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons, sel_polygons, image_shape, slice_anchor, slice_shape, bbox_indices) = (outs[k].at(j) for k in range(len(outs))) # Checking that the output polygon descriptors are the ones associated with the # selected bounding boxes expected_polygons_list = [] expected_vertices_list = [] ver_count = 0 for k in range(in_polygons.shape[0]): mask_id = in_polygons[k][0] in_ver_start_idx = in_polygons[k][1] in_ver_end_idx = in_polygons[k][2] pol_nver = in_ver_end_idx - in_ver_start_idx if mask_id in bbox_indices: expected_polygons_list.append( [mask_id, ver_count, ver_count + pol_nver]) for j in range(in_ver_start_idx, in_ver_end_idx): expected_vertices_list.append(in_vertices[j]) ver_count = ver_count + pol_nver expected_sel_polygons = np.array(expected_polygons_list) np.testing.assert_equal(expected_sel_polygons, sel_polygons) # Checking the selected vertices correspond to the selected masks expected_sel_vertices = np.array(expected_vertices_list) np.testing.assert_equal(expected_sel_vertices, sel_vertices) # Chekc that the vertices are correctly mapped to the cropping window expected_out_vertices = np.copy(expected_sel_vertices) crop_x, crop_y = slice_anchor crop_w, crop_h = slice_shape for v in range(expected_out_vertices.shape[0]): expected_out_vertices[v, 0] = (expected_out_vertices[v, 0] - crop_x) / crop_w expected_out_vertices[v, 1] = (expected_out_vertices[v, 1] - crop_y) / crop_h np.testing.assert_allclose(expected_out_vertices, out_vertices, rtol=1e-4) # Checking the conversion to absolute coordinates h, w, _ = image_shape wh = np.array([w, h]) whwh = np.array([w, h, w, h]) expected_out_vertices_abs = expected_out_vertices * wh np.testing.assert_allclose(expected_out_vertices_abs, out_vertices_abs, rtol=1e-4) # Checking clamping of the relative coordinates expected_out_vertices_clamped = np.clip(expected_out_vertices, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_vertices_clamped, out_vertices_clamped, rtol=1e-4) # Checking clamping of the absolute coordinates expected_out_vertices_clamped_abs = np.clip( expected_out_vertices_abs, 0, wh) np.testing.assert_allclose(expected_out_vertices_clamped_abs, out_vertices_clamped_abs, rtol=1e-4) # Checking scaling of the bounding boxes expected_in_bboxes_abs = in_bboxes * whwh np.testing.assert_allclose(expected_in_bboxes_abs, in_bboxes_abs, rtol=1e-4) # Check box selection and mapping to the cropping window expected_out_bboxes = np.copy(in_bboxes[bbox_indices, :]) for k in range(expected_out_bboxes.shape[0]): expected_out_bboxes[k, 0] = (expected_out_bboxes[k, 0] - crop_x) / crop_w expected_out_bboxes[k, 1] = (expected_out_bboxes[k, 1] - crop_y) / crop_h expected_out_bboxes[k, 2] = (expected_out_bboxes[k, 2] - crop_x) / crop_w expected_out_bboxes[k, 3] = (expected_out_bboxes[k, 3] - crop_y) / crop_h expected_out_bboxes = np.clip(expected_out_bboxes, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_bboxes, out_bboxes, rtol=1e-4) expected_out_bboxes_abs = expected_out_bboxes * whwh np.testing.assert_allclose(expected_out_bboxes_abs, out_bboxes_abs, rtol=1e-4)