def zoom_fn(self, img, lbl): scale = self.random_augmentation(0.15, fn.random.uniform(range=(0.7, 1.0)), 1.0) d, h, w = [scale * x for x in self.patch_size] if self.dim == 2: d = self.patch_size[0] img, lbl = fn.crop(img, crop_h=h, crop_w=w, crop_d=d), fn.crop(lbl, crop_h=h, crop_w=w, crop_d=d) img = fn.resize(img, interp_type=types.DALIInterpType.INTERP_CUBIC, size=self.crop_shape_float) lbl = fn.resize(lbl, interp_type=types.DALIInterpType.INTERP_NN, size=self.crop_shape_float) return img, lbl
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.decoders.image_random_crop( images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.decoders.image(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize( images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=mirror) labels = labels.gpu() return images, labels
def zoom_fn(self, img, lbl): resized_shape = self.crop_shape * self.random_augmentation( 0.15, fn.uniform(range=(0.7, 1.0)), 1.0) img, lbl = fn.crop(img, crop=resized_shape), fn.crop(lbl, crop=resized_shape) img = fn.resize(img, interp_type=types.DALIInterpType.INTERP_CUBIC, size=self.crop_shape_float) lbl = fn.resize(lbl, interp_type=types.DALIInterpType.INTERP_NN, size=self.crop_shape_float) return img, lbl
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and GPU to avoid reallocations in runtime device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 # ask HW NVJPEG to allocate memory ahead for the biggest image in the data set to avoid reallocations in runtime preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0 preallocate_height_hint = 6430 if decoder_device == 'mixed' else 0 if is_training: images = fn.decoders.image_random_crop(images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, preallocate_width_hint=preallocate_width_hint, preallocate_height_hint=preallocate_height_hint, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.decoders.image(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255], mirror=mirror) labels = labels.gpu() return images, labels
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), 2 if channel_first else 1, 2, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def resize_dali(input, channel_first, dtype, interp, mode, size, w, h, d, roi_start, roi_end, minibatch_size, max_size): return fn.resize(input, interp_type=interp, dtype=dtype, mode=mode, resize_x=w, resize_y=h, resize_z=d, size=size, roi_start=roi_start, roi_end=roi_end, minibatch_size=minibatch_size, max_size=max_size, subpixel_scale=False) # disable subpixel scale so we can use PIL as reference
def test_separated_exec_setup(): batch_size = 128 pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None, prefetch_queue_depth={ "cpu_size": 5, "gpu_size": 3 }) inputs, labels = fn.readers.caffe(path=caffe_dir, shard_id=0, num_shards=1) images = fn.decoders.image(inputs, output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224) images_cpu = fn.dump_image(images, suffix="cpu") pipe.set_outputs(images, images_cpu) pipe.build() out = pipe.run() assert (out[0].is_dense_tensor()) assert (out[1].is_dense_tensor()) assert (out[0].as_tensor().shape() == out[1].as_tensor().shape()) a_raw = out[0] a_cpu = out[1] for i in range(batch_size): t_raw = a_raw.at(i) t_cpu = a_cpu.at(i) assert (np.sum(np.abs(t_cpu - t_raw)) == 0)
def create_decoder_random_crop_pipeline(data_path, device): seed = 1234 jpegs, _ = fn.readers.file(file_root=data_path, shard_id=0, num_shards=1, name="Reader") w = 242 h = 230 images_random_crop_1 = fn.decoders.image_random_crop(jpegs, device=device, output_type=types.RGB, hw_decoder_load=0.7, seed=seed) images_random_crop_1 = fn.resize(images_random_crop_1, size=(w, h)) images = fn.decoders.image(jpegs, device=device, hw_decoder_load=0.7, output_type=types.RGB) images_random_crop_2 = fn.random_resized_crop(images, size=(w, h), seed=seed) return images_random_crop_1, images_random_crop_2
def val_pipeline(cfg: ValLoaderConfig): jpeg, label = fn.readers.file( file_root=ROOT_DATA_DIR + "/val/", shard_id=env_rank(), num_shards=env_world_size(), name="Reader", ) image = fn.decoders.image(jpeg, device="mixed", output_type=types.RGB) crop_size = cfg.image_size if cfg.full_crop else math.ceil( (cfg.image_size * 1.14 + 8) // 16 * 16) image = fn.resize(image, device="gpu", interp_type=types.INTERP_TRIANGULAR, resize_shorter=crop_size) image = fn.crop_mirror_normalize( image, device="gpu", crop=(cfg.image_size, cfg.image_size), mean=DATA_MEAN, std=DATA_STD, dtype=types.FLOAT, output_layout=types.NCHW, ) label = fn.one_hot(label, num_classes=cfg.num_classes).gpu() return image, label
def get_pipeline(folder="train", custom_reader=None): pipe = Pipeline(batch_size=64, num_threads=1, device_id=1) if custom_reader: raw_files, labels = custom_reader else: raw_files, labels = fn.file_reader(file_root="%s" % folder, random_shuffle=True) decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY) resize = fn.resize(decode, device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT) hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)), value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8) bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1))) cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT, output_layout=types.NHWC, image_type=types.GRAY, mean=[255 // 2], std=[255 // 2]) rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True) tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu") # Reshaping to a format PyTorch likes pipe.set_outputs(tpose, labels) pipe.build() dali_iter = DALIClassificationIterator([pipe], -1) return dali_iter
def _test_very_small_output(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() resize_tiny = fn.resize(images, size=1e-10) pipe.set_outputs(resize_tiny) pipe.build() for it in range(3): out, = pipe.run() ref_size = [1, 1, 1, 1] if dim == 3 else [1, 1, 3] for t in out: assert t.shape() == ref_size
def _test_empty_input(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() in_rel_shapes = np.ones([batch_size, dim], dtype=np.float32) in_rel_shapes[::2, :] *= 0 # all zeros in every second sample degenerate_images = fn.slice(images, np.zeros([dim]), fn.external_source(lambda: in_rel_shapes), axes=list(range(dim))) sizes = np.random.randint(20, 50, [batch_size, dim], dtype=np.int32) size_inp = fn.external_source( lambda: [x.astype(np.float32) for x in sizes]) resize_no_empty = fn.resize(images, size=size_inp, mode="not_larger") resize_with_empty = fn.resize(degenerate_images, size=size_inp, mode="not_larger") pipe.set_outputs(resize_no_empty, resize_with_empty) pipe.build() for it in range(3): out_no_empty, out_with_empty = pipe.run() if device == "gpu": out_no_empty = out_no_empty.as_cpu() out_with_empty = out_with_empty.as_cpu() for i in range(batch_size): if i % 2 != 0: assert np.array_equal(out_no_empty.at(i), out_with_empty.at(i)) else: assert np.prod(out_with_empty.at(i).shape) == 0
def define_graph(self): inputs, bboxes, labels, polygons, vertices = fn.readers.coco( file_root=self.file_root, annotations_file=self.annotation_file, skip_empty=True, shard_id=self.share_id, num_shards=self.num_gpus, ratio=True, ltrb=True, polygon_masks = True, random_shuffle=self.random_shuffle, shuffle_after_epoch=self.shuffle_after_epoch, name="Reader") input_shape = fn.slice(fn.cast(fn.peek_image_shape(inputs), dtype=types.INT32), 0, 2, axes=[0]) h = fn.slice(input_shape, 0, 1, axes = [0], dtype=types.FLOAT) w = fn.slice(input_shape, 1, 1, axes = [0], dtype=types.FLOAT) short_side = math.min(w, h) scale = fn.random.uniform(range=[0.3, 1.]) crop_side = fn.cast(math.ceil(scale * short_side), dtype=types.INT32) crop_shape = fn.cat(crop_side, crop_side) anchor_rel, shape_rel, bboxes, labels, bbox_indices = fn.random_bbox_crop( bboxes, labels, input_shape=input_shape, crop_shape=crop_shape, shape_layout="HW", thresholds=[0.], # No minimum intersection-over-union, for demo purposes allow_no_crop=False, # No-crop is disallowed, for demo purposes seed=-1, # Fixed random seed for deterministic results bbox_layout="xyXY", # left, top, right, back output_bbox_indices=True, # Output indices of the filtered bounding boxes total_num_attempts=1024, ) polygons, vertices = fn.segmentation.select_masks( bbox_indices, polygons, vertices ) images = fn.decoders.image_slice( inputs, anchor_rel, shape_rel, normalized_anchor=False, normalized_shape=False, device='mixed' ) images = fn.color_space_conversion(images, image_type=types.RGB, output_type=types.BGR) MT_1_vertices = fn.transforms.crop( to_start=(0.0, 0.0), to_end=fn.cat(w, h) ) MT_2_vertices = fn.transforms.crop( from_start=anchor_rel, from_end=(anchor_rel + shape_rel), to_start=(0.0, 0.0), to_end=(1., 1.) ) vertices = fn.coord_transform(fn.coord_transform(vertices, MT=MT_1_vertices), MT=MT_2_vertices) targets = fn.cat( bboxes, fn.reshape(vertices, shape=[-1, 10]), axis=1) interp_methods = [types.INTERP_LINEAR, types.INTERP_CUBIC, types.INTERP_LANCZOS3, types.INTERP_GAUSSIAN, types.INTERP_NN, types.INTERP_TRIANGULAR] interp_method = fn.random.uniform(values=[int(x) for x in interp_methods], dtype=types.INT32) interp_method = fn.reinterpret(interp_method, dtype=types.INTERP_TYPE) images = fn.resize(images, dtype=types.FLOAT, size=self.input_dim, interp_type=interp_method) labels = labels.gpu() targets = targets.gpu() return (images, targets, labels)
def create_coco_pipeline(file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): pipeline = Pipeline(batch_size, num_threads, local_rank, seed=42 + device_id) with pipeline: images, bboxes, labels = fn.coco_reader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) # use float to avoid clipping and quantizing the intermediate result images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]), saturation=fn.uniform(range=[0.5, 1.5])) images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = fn.uniform(range=[0.875, 1.125]), contrast = fn.uniform(range=[0.5, 1.5])) bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, mean=[104., 117., 123.], std=[1., 1., 1.], mirror=flip_coin, dtype=types.FLOAT, output_layout="CHW", pad_output=False) pipeline.set_outputs(images, bboxes, labels) return pipeline
def define_graph(self): inputs, labels = self.input(name="Reader") images = self.decode(inputs) if self.prime_size: images = fn.resize(images, resize_x=101, resize_y=43) images = fn.cast(images, dtype=self.dtype) images = self.water(images) return images
def pipe(device, interp_type, test_data=checkerboard, out_size=out_size): data = types.Constant(test_data, device=device) data = fn.expand_dims(data, axes=[2]) resized = fn.resize(data, dtype=types.FLOAT, min_filter=interp_type, mag_filter=interp_type, size=out_size) resized = fn.squeeze(resized, axes=[2]) return resized
def rn50_pipeline_2(data_path): uniform = fn.random.uniform(range=(0., 1.), shape=2) resize_uniform = fn.random.uniform(range=(256., 480.)) mirror = fn.random.coin_flip(probability=0.5) jpegs, _ = fn.readers.file(file_root=data_path) images = fn.decoders.image(jpegs, device='mixed', output_type=types.RGB) resized_images = fn.resize(images, device='gpu', interp_type=types.INTERP_LINEAR, resize_shorter=resize_uniform) output = fn.crop_mirror_normalize(resized_images, device='gpu', dtype=types.FLOAT16, crop=(224, 224), mean=[128., 128., 128.], std=[1., 1., 1.], mirror=mirror, crop_pos_x=uniform[0], crop_pos_y=uniform[1]) return output
def define_graph(self): inputs, labels = self.input(name="Reader") images = self.decode(inputs) if self.device == 'gpu': images = images.gpu() if self.prime_size: images = fn.resize(images, resize_x=101, resize_y=43) mask = fn.random.coin_flip(seed=42) if self.do_mask else None images = fn.cast(images, dtype=self.dtype) images = self.water(images, mask=mask) return images
def create_dali_pipe(channel_first, seq_len, interp, dtype, w, h, batch_size=2): pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0) with pipe: layout = "FCHW" if channel_first else "FHWC" ext = fn.external_source(GetSequences(channel_first, seq_len, batch_size), layout=layout) resize_cpu_out = fn.resize(ext, resize_x=w, resize_y=h, interp_type=interp, dtype=dtype, save_attrs=True) resize_gpu_out = fn.resize(ext.gpu(), resize_x=w, resize_y=h, interp_type=interp, minibatch_size=4, dtype=dtype, save_attrs=True) dali_resized_cpu, size_cpu = resize_cpu_out dali_resized_gpu, size_gpu = resize_gpu_out # extract just HW part from the input shape shape_anchor = np.array([2 if channel_first else 1], dtype=np.float32) shape_shape = np.array([2], dtype=np.float32) ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32), types.Constant(shape_anchor, device="cpu"), types.Constant(shape_shape, device="cpu"), normalized_anchor=False, normalized_shape=False, axes=[0]) pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size, size_cpu, size_gpu) return pipe
def setup_dali( image_file='/mnt/data/DATASETS/samples/images/image_110.jpg', image_dim=[800, 1600], batch_size=1, num_threads=4, device='mixed', device_id=0, output_dir='./out/', ): os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data, _ = fn.file_reader(files=[image_file]) # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) pipeline.build() output = pipeline.run() img = output[0].at(0) if device == 'cpu' else output[0].as_cpu().at(0) img = img.transpose(1, 2, 0) # HWC img = img[:, :, ::-1] # BGR print(img) quit() cv2.imwrite(os.path.join(output_dir, 'dali_image.jpg'), img)
def test_compose_change_device(): batch_size = 3 pipe = Pipeline(batch_size, 1, 0) size = fn.random.uniform(shape=2, range=(300,500)) c = ops.Compose([ ops.decoders.Image(device="cpu"), ops.Resize(size=size, device="gpu") ]) files, labels = fn.readers.caffe(path=caffe_db_folder, seed=1) pipe.set_outputs(c(files), fn.resize(fn.decoders.image(files).gpu(), size=size)) pipe.build() out = pipe.run() assert isinstance(out[0], dali.backend.TensorListGPU) test_utils.check_batch(out[0], out[1], batch_size=batch_size)
def get_image_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1, def_for_dataset=False): test_data_root = get_dali_extra_path() file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join( test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.decoders.image( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize( images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize( images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) shapes = ( (batch_size, 3, 224, 224), (batch_size, 1, 1), (batch_size, 1)) dtypes = ( tf.float32, tf.int32, tf.int16) return pipe, shapes, dtypes
def get_pipeline( batch_size=4, in_size=None, out_size=None, even_paste_count=False, k=4, dtype=types.UINT8, no_intersections=True, full_input=False, in_anchor_top_left=False, in_anchor_range=None, out_anchor_top_left=False, out_anchor_range=None, use_gpu=False, num_out_of_bounds=0 ): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=np.random.randint(12345)) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.decoders.image(input, device='cpu', output_type=types.RGB) resized = fn.resize(decoded, resize_x=in_size[1], resize_y=in_size[0]) in_idx_l, in_anchors_l, shapes_l, out_anchors_l = prepare_cuts( k, batch_size, in_size, out_size, even_paste_count, no_intersections, full_input, in_anchor_top_left, in_anchor_range, out_anchor_top_left, out_anchor_range, num_out_of_bounds) in_idx = fn.external_source(lambda: in_idx_l) in_anchors = fn.external_source(lambda: in_anchors_l) shapes = fn.external_source(lambda: shapes_l) out_anchors = fn.external_source(lambda: out_anchors_l) kwargs = { "in_ids": in_idx, "output_size": out_size, "dtype": dtype } if not full_input: kwargs["shapes"] = shapes if not in_anchor_top_left: kwargs["in_anchors"] = in_anchors if not out_anchor_top_left: kwargs["out_anchors"] = out_anchors pasted = fn.multi_paste(resized.gpu() if use_gpu else resized, **kwargs) pipe.set_outputs(pasted, resized) return pipe, in_idx_l, in_anchors_l, shapes_l, out_anchors_l
def ExternalSourcePipeline(params, num_threads, device_id, external_date, seed): pipe = Pipeline(params.batch_size, num_threads, device_id, seed=seed) with pipe: jpegs, labels = fn.external_source(source=external_date, num_outputs=2) images = fn.image_decoder(jpegs, device="mixed", output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224) images = fn.cast(images, dtype=types.UINT8) / 255 images = fn.normalize(images, axes=[0, 1], mean=params.mean, stddev=params.std, device='gpu', batch=False) output = fn.transpose(images, perm=[2, 0, 1], device='gpu') pipe.set_outputs(output, labels) return pipe
def get_pipeline(batch_size, num_threads, device, device_id=0, shard_id=0, num_shards=1): test_data_root = os.environ['DALI_EXTRA_PATH'] file_root = os.path.join(test_data_root, 'db', 'coco_dummy', 'images') annotations_file = os.path.join(test_data_root, 'db', 'coco_dummy', 'instances.json') pipe = Pipeline(batch_size, num_threads, device_id) with pipe: jpegs, _, _, image_ids = fn.coco_reader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_shards, ratio=False, image_ids=True) images = fn.image_decoder( jpegs, device=('mixed' if device == 'gpu' else 'cpu'), output_type=types.RGB) images = fn.resize(images, resize_x=224, resize_y=224, interp_type=types.INTERP_LINEAR) images = fn.crop_mirror_normalize(images, dtype=types.FLOAT, mean=[128., 128., 128.], std=[1., 1., 1.]) if device == 'gpu': image_ids = image_ids.gpu() ids_reshaped = fn.reshape(image_ids, shape=[1, 1]) ids_int16 = fn.cast(image_ids, dtype=types.INT16) pipe.set_outputs(images, ids_reshaped, ids_int16) return pipe
def get_pipeline(batch_size=4, in_size=None, out_size=None, even_paste_count=False, k=4, dtype=types.UINT8, no_intersections=True, full_input=False, in_anchor_top_left=False, out_anchor_top_left=False): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=types.CPU_ONLY_DEVICE_ID) with pipe: input, _ = fn.file_reader(file_root=img_dir) decoded = fn.image_decoder(input, device='cpu', output_type=types.RGB) resized = fn.resize(decoded, resize_x=in_size[1], resize_y=in_size[0]) in_idx_l, in_anchors_l, shapes_l, out_anchors_l = prepare_cuts( k, batch_size, in_size, out_size, even_paste_count, no_intersections, full_input, in_anchor_top_left, out_anchor_top_left) in_idx = fn.external_source(lambda: in_idx_l) in_anchors = fn.external_source(lambda: in_anchors_l) shapes = fn.external_source(lambda: shapes_l) out_anchors = fn.external_source(lambda: out_anchors_l) kwargs = {"in_ids": in_idx, "output_size": out_size, "dtype": dtype} if not full_input: kwargs["shapes"] = shapes if not in_anchor_top_left: kwargs["in_anchors"] = in_anchors if not out_anchor_top_left: kwargs["out_anchors"] = out_anchors pasted = fn.multi_paste(resized, **kwargs) pipe.set_outputs(pasted, resized) return pipe, in_idx_l, in_anchors_l, shapes_l, out_anchors_l
def setup_dali( input_name='DALI_INPUT_0', image_dim=[896, 1536], batch_size=1, num_threads=4, device='cpu', device_id=0, output_dir='./out/', ): pipeline = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipeline: data = fn.external_source(name=input_name, device="cpu") # image preprocess images = fn.image_decoder(data, device=device) images = fn.resize(images, size=image_dim, mode="not_larger", max_size=image_dim) images = fn.pad(images, fill_value=0, shape=[image_dim[0], image_dim[1], 1]) images = fn.transpose(images, perm=[2, 0, 1]) images = fn.cast(images, dtype=dali.types.FLOAT) images = images / 255. # input shape input_shape = np.float32((image_dim[0], image_dim[1], 1)) # original shape shapes = fn.peek_image_shape(data) shapes = fn.cast(shapes, dtype=dali.types.FLOAT) # gather outputs out = [images, input_shape, shapes] pipeline.set_outputs(*out) os.makedirs(os.path.dirname(output_dir), exist_ok=True) pipeline.serialize(filename=os.path.join(output_dir, 'model.dali'))
def RN50Pipeline(): device = 'mixed' if args.device == 'gpu' else 'cpu' jpegs, _ = fn.readers.file(file_root=args.images_dir) images = fn.decoders.image_random_crop( jpegs, device=device, output_type=types.RGB, hw_decoder_load=args.hw_load, preallocate_width_hint=args.width_hint, preallocate_height_hint=args.height_hint) images = fn.resize(images, resize_x=224, resize_y=224) layout = types.NCHW out_type = types.FLOAT16 coin_flip = fn.random.coin_flip(probability=0.5) images = fn.crop_mirror_normalize( images, dtype=out_type, output_layout=layout, crop=(224, 224), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=coin_flip) return images
def create_coco_pipeline(default_boxes, args): try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 images, bboxes, labels = fn.readers.coco( file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop( bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.random.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) saturation = fn.uniform(range=[0.5, 1.5]) contrast = fn.uniform(range=[0.5, 1.5]) brightness = fn.uniform(range=[0.875, 1.125]) hue = fn.uniform(range=[-0.5, 0.5]) images = fn.hsv(images, dtype=types.FLOAT, hue=hue, saturation=saturation) # use float to avoid clipping and # quantizing the intermediate result images = fn.brightness_contrast( images, contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8, brightness=brightness, contrast=contrast) dtype = types.FLOAT16 if args.fp16 else types.FLOAT bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize( images, crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=flip_coin, dtype=dtype, output_layout="CHW", pad_output=False) bboxes, labels = fn.box_encoder(bboxes, labels, criteria=0.5, anchors=default_boxes.as_ltrb_list()) labels = labels.gpu() bboxes = bboxes.gpu() return images, bboxes, labels
def create_image_pipeline( batch_size, num_threads, device_id, image0_list, image1_list, flow_list, valBool, ): pipeline = Pipeline(batch_size, num_threads, device_id, seed=2) with pipeline: if valBool: shuffleBool = False else: shuffleBool = True """ READ FILES """ image0, _ = fn.readers.file( file_root=args.data, files=image0_list, random_shuffle=shuffleBool, name="Reader", seed=1, ) image1, _ = fn.readers.file( file_root=args.data, files=image1_list, random_shuffle=shuffleBool, seed=1, ) flo = fn.readers.numpy( file_root=args.data, files=flow_list, random_shuffle=shuffleBool, seed=1, ) """ DECODE AND RESHAPE """ image0 = fn.decoders.image(image0, device="cpu") image0 = fn.reshape(image0, layout="HWC") image1 = fn.decoders.image(image1, device="cpu") image1 = fn.reshape(image1, layout="HWC") images = fn.cat(image0, image1, axis=2) flo = fn.reshape(flo, layout="HWC") if valBool: images = fn.resize(images, resize_x=162, resize_y=122) else: """ CO-TRANSFORM """ # random translate # angle_rng = fn.random.uniform(range=(-90, 90)) # images = fn.rotate(images, angle=angle_rng, fill_value=0) # flo = fn.rotate(flo, angle=angle_rng, fill_value=0) images = fn.random_resized_crop( images, size=[122, 162], # 122, 162 random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) flo = fn.random_resized_crop( flo, size=[122, 162], random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) # coin1 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=10) # coin1_n = coin1 ^ True # coin2 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=20) # coin2_n = coin2 ^ True # images = ( # fn.flip(images, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(images, horizontal=1) * coin1 * coin2_n # + fn.flip(images, vertical=1) * coin1_n * coin2 # + images * coin1_n * coin2_n # ) # flo = ( # fn.flip(flo, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(flo, horizontal=1) * coin1 * coin2_n # + fn.flip(flo, vertical=1) * coin1_n * coin2 # + flo * coin1_n * coin2_n # ) # _flo = flo # flo_0 = fn.slice(_flo, axis_names="C", start=0, shape=1) # flo_1 = fn.slice(_flo, axis_names="C", start=1, shape=1) # flo_0 = flo_0 * coin1 * -1 + flo_0 * coin1_n # flo_1 = flo_1 * coin2 * -1 + flo_1 * coin2_n # # flo = noflip + vertical flip + horizontal flip + both_flip # # A horizontal flip is around the vertical axis (switch left and right) # # So for a vertical flip coin1 is activated and needs to give +1, coin2 is activated needs to give -1 # # for a horizontal flip coin1 is activated and needs to be -1, coin2_n needs +1 # # no flip coin coin1_n +1, coin2_n +1 # flo = fn.cat(flo_0, flo_1, axis_name="C") """ NORMALIZE """ images = fn.crop_mirror_normalize( images, mean=[0, 0, 0, 0, 0, 0], std=[255, 255, 255, 255, 255, 255]) images = fn.crop_mirror_normalize( images, mean=[0.45, 0.432, 0.411, 0.45, 0.432, 0.411], std=[1, 1, 1, 1, 1, 1], ) flo = fn.crop_mirror_normalize( flo, mean=[0, 0], std=[args.div_flow, args.div_flow]) pipeline.set_outputs(images, flo) return pipeline