def flips_fn(self, img, lbl): kwargs = { "horizontal": fn.coin_flip(probability=0.33), "vertical": fn.coin_flip(probability=0.33) } if self.dim == 3: kwargs.update({"depthwise": fn.coin_flip(probability=0.33)}) return fn.flip(img, **kwargs), fn.flip(lbl, **kwargs)
def check_per_sample_gaussian_blur(batch_size, sigma_dim, window_size_dim, shape, layout, axes, op_type="cpu"): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) data = RandomlyShapedDataIterator(batch_size, max_shape=shape) with pipe: if sigma_dim is not None: sigma = fn.random.uniform(range=[0.5, 3], shape=[sigma_dim]) sigma_arg = sigma else: # placeholder, so we can return something sigma = fn.coin_flip(probability=0) sigma_arg = None if window_size_dim is not None: window_radius = fn.random.uniform(range=[5, 10], shape=[window_size_dim]) window_size = fn.cast(window_radius, dtype=types.INT32) * 2 + 1 window_arg = window_size else: window_size = fn.coin_flip(probability=0) window_arg = None input = fn.external_source(data, layout=layout) if op_type == "gpu": input = input.gpu() blurred = fn.gaussian_blur(input, device=op_type, sigma=sigma_arg, window_size=window_arg) pipe.set_outputs(blurred, input, sigma, window_size) pipe.build() for _ in range(test_iters): result, input, sigma, window_size = pipe.run() if op_type == "gpu": result = result.as_cpu() input = input.as_cpu() input = to_batch(input, batch_size) sigma = to_batch(sigma, batch_size) window_size = to_batch(window_size, batch_size) baseline = [] for i in range(batch_size): sigma_arg = sigma[i] if sigma is not None else None window_arg = window_size[i] if window_size_dim is not None else None skip_axes = count_skip_axes(layout) baseline.append( gaussian_baseline(input[i], sigma_arg, window_arg, axes, skip_axes)) check_batch(result, baseline, batch_size, max_allowed_error=1, expected_layout=layout)
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) depthwise = fn.coin_flip() horizontal = fn.coin_flip() vertical = fn.coin_flip() data = fn.external_source(source=input_data, cycle=False, device=device) processed = fn.flip(data, depthwise=depthwise, horizontal=horizontal, vertical=vertical) pipe.set_outputs(processed) return pipe
def create_coco_pipeline(file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): pipeline = Pipeline(batch_size, num_threads, local_rank, seed=42 + device_id) with pipeline: images, bboxes, labels = fn.coco_reader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) # use float to avoid clipping and quantizing the intermediate result images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]), saturation=fn.uniform(range=[0.5, 1.5])) images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = fn.uniform(range=[0.875, 1.125]), contrast = fn.uniform(range=[0.5, 1.5])) bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, mean=[104., 117., 123.], std=[1., 1., 1.], mirror=flip_coin, dtype=types.FLOAT, output_layout="CHW", pad_output=False) pipeline.set_outputs(images, bboxes, labels) return pipeline
def crop_fn(self, img, lbl): center = fn.segmentation.random_mask_pixel(lbl, foreground=fn.coin_flip(probability=self.oversampling)) crop_anchor = self.slice_fn(center, 1, self.dim) - self.crop_shape // 2 adjusted_anchor = math.max(0, crop_anchor) max_anchor = self.slice_fn(fn.shapes(lbl), 1, self.dim) - self.crop_shape crop_anchor = math.min(adjusted_anchor, max_anchor) img = fn.slice(img.gpu(), crop_anchor, self.crop_shape, axis_names=self.axis_name, out_of_bounds_policy="pad") lbl = fn.slice(lbl.gpu(), crop_anchor, self.crop_shape, axis_names=self.axis_name, out_of_bounds_policy="pad") return img, lbl
def crop_fn(self, img, lbl): center = fn.segmentation.random_mask_pixel(lbl, foreground=fn.coin_flip(probability=self.oversampling, **self.aug_seed_kwargs), **self.aug_seed_kwargs) crop_anchor = self.slice_fn(center) - self.crop_shape // 2 adjusted_anchor = math.max(0, crop_anchor) max_anchor = self.slice_fn(fn.shapes(lbl)) - self.crop_shape crop_anchor = math.min(adjusted_anchor, max_anchor) img = fn.slice(img, crop_anchor, self.crop_shape, axis_names="DHW", out_of_bounds_policy="pad") lbl = fn.slice(lbl, crop_anchor, self.crop_shape, axis_names="DHW", out_of_bounds_policy="pad") return img, lbl
def check_random_mask_pixel(ndim=2, batch_size=3, min_extent=20, max_extent=50): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: # Input mask in_shape_dims = [fn.cast(fn.random.uniform(range=(min_extent, max_extent + 1), shape=(1,), device='cpu'), dtype=types.INT32) for d in range(ndim)] in_shape = fn.cat(*in_shape_dims, axis=0) in_mask = fn.cast(fn.random.uniform(range=(0, 2), device='cpu', shape=in_shape), dtype=types.INT32) fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # > 0 fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # > 0.99 fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) # == 2 rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) coin_flip = fn.coin_flip(probability=0.7) fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) # Demo purposes: Taking a random pixel and produce a valid anchor to feed slice crop_shape = in_shape - 2 # We want to force the center adjustment, therefore the large crop shape anchor = fn.cast(fg_pixel1, dtype=types.INT32) - crop_shape // 2 anchor = math.min(math.max(0, anchor), in_shape - crop_shape) out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, anchor, crop_shape, out_mask) pipe.build() for iter in range(3): outputs = pipe.run() for idx in range(batch_size): in_mask = outputs[0].at(idx) fg_pixel1 = outputs[1].at(idx).tolist() fg_pixel2 = outputs[2].at(idx).tolist() fg_pixel3 = outputs[3].at(idx).tolist() rnd_pixel = outputs[4].at(idx).tolist() coin_flip = outputs[5].at(idx).tolist() fg_biased = outputs[6].at(idx).tolist() anchor = outputs[7].at(idx).tolist() crop_shape = outputs[8].at(idx).tolist() out_mask = outputs[9].at(idx) assert in_mask[tuple(fg_pixel1)] > 0 assert in_mask[tuple(fg_pixel2)] > 0.99 assert in_mask[tuple(fg_pixel3)] == 2 assert in_mask[tuple(fg_biased)] > 0 or not coin_flip for d in range(ndim): assert 0 <= anchor[d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] assert out_mask.shape == tuple(crop_shape)
def random_augmentation(self, probability, augmented, original): condition = fn.cast(fn.coin_flip(probability=probability), dtype=types.DALIDataType.BOOL) neg_condition = condition ^ True return condition * augmented + neg_condition * original
def create_coco_pipeline(default_boxes, args, seed): pipeline = Pipeline(args.batch_size, args.num_workers, args.local_rank, seed=seed) try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 with pipeline: images, bboxes, labels = fn.coco_reader(file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) saturation = fn.uniform(range=[0.5, 1.5]) contrast = fn.uniform(range=[0.5, 1.5]) brightness = fn.uniform(range=[0.875, 1.125]) hue = fn.uniform(range=[-0.5, 0.5]) images = fn.hsv(images, dtype=types.FLOAT, hue=hue, saturation=saturation) # use float to avoid clipping and # quantizing the intermediate result images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = brightness, contrast = contrast) dtype = types.FLOAT16 if args.fp16 else types.FLOAT bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=flip_coin, dtype=dtype, output_layout="CHW", pad_output=False) bboxes, labels = fn.box_encoder(bboxes, labels, criteria=0.5, anchors=default_boxes.as_ltrb_list()) labels=labels.gpu() bboxes=bboxes.gpu() pipeline.set_outputs(images, bboxes, labels) return pipeline
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype, w_input, h_input, d_input, use_size_arg, use_size_input, use_roi): dali_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) with dali_pipe: if dim == 2: files, labels = dali.fn.caffe_reader(path = db_2d_folder, random_shuffle = True) images_cpu = dali.fn.image_decoder(files, device="cpu") else: images_cpu = dali.fn.external_source(source=random_3d_loader(batch_size), layout="DHWC") images_hwc = images_cpu if device == "cpu" else images_cpu.gpu() if channel_first: images = dali.fn.transpose(images_hwc, perm=[3,0,1,2] if dim == 3 else [2,0,1], transpose_layout=True) else: images = images_hwc roi_start = None roi_end = None w = None h = None d = None size = None minibatch_size = 2 if dim == 3 else 8 if use_roi: # Calculate absolute RoI in_size = fn.slice(fn.shapes(images_cpu), types.Constant(0, dtype=types.FLOAT, device="cpu"), types.Constant(dim, dtype=types.FLOAT, device="cpu"), axes=[0], normalized_shape=False) roi_start = fn.uniform(range=(0,0.4), shape=[dim]) * in_size roi_end = fn.uniform(range=(0.6,1.0), shape=[dim]) * in_size size_range = (10, 200) if dim == 3 else (10, 1000) if use_size_arg: if use_size_input: mask = fn.cast(fn.uniform(range=(0.8, 1.9), shape=[dim]), dtype=types.INT32) size = fn.uniform(range=size_range, shape=[dim]) * mask else: size = [300, 400] if dim == 2 else [80, 100, 120] resized = resize_dali(images, channel_first, dtype, interp, mode, size, None, None, None, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) else: if w_input: has_w = fn.coin_flip(probability=0.8) w = fn.uniform(range=size_range) * has_w else: w = 320 # some fixed value if h_input: has_h = fn.coin_flip(probability=0.8) h = fn.uniform(range=size_range) * has_h else: h = 240 # some other fixed value if dim >= 3: if d_input: has_d = fn.coin_flip(probability=0.8) d = fn.uniform(range=size_range) * has_d else: d = 31 # some other fixed value resized = resize_dali(images, channel_first, dtype, interp, mode, None, w, h, d, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) outputs = [images, resized] if roi_start and roi_end: outputs += [roi_start, roi_end] for x in (d, h, w, size): if x is not None: if isinstance(x, _DataNode): outputs.append(x) else: outputs.append(types.Constant(np.array(x, dtype=np.float32))) dali_pipe.set_outputs(*outputs) pil_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, exec_async=False, exec_pipelined=False) with pil_pipe: images = fn.external_source(name="images", layout=layout_str(dim, channel_first)) sizes = fn.external_source(name="size") roi_start = fn.external_source(name="roi_start") roi_end = fn.external_source(name="roi_end") resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes, roi_start, roi_end) resized = fn.reshape(resized, layout=layout_str(dim, channel_first)) pil_pipe.set_outputs(resized) dali_pipe.build() pil_pipe.build() return dali_pipe, pil_pipe
def create_dali_pipeline(batch_size, num_threads, device_id, data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): pipeline = Pipeline(batch_size, num_threads, device_id, seed=12 + device_id) with pipeline: images, labels = fn.file_reader(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.image_decoder_random_crop( images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.coin_flip(probability=0.5) else: images = fn.image_decoder(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize( images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=mirror) labels = labels.gpu() labels = fn.cast(labels, dtype=types.INT64) pipeline.set_outputs(images, labels) return pipeline