def image_decoder_rcrop_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') decoded = fn.image_decoder_random_crop(encoded, device=device) pipe.set_outputs(decoded) return pipe
def test_image_decoder_random_crop_device(): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) input, _ = fn.file_reader(file_root=images_dir, shard_id=0, num_shards=1) decoded = fn.image_decoder_random_crop(input, output_type=types.RGB) pipe.set_outputs(decoded) pipe.build() for _ in range(3): pipe.run()
def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=False, is_training=True): images, labels = fn.readers.file(file_root=data_dir, shard_id=shard_id, num_shards=num_shards, random_shuffle=is_training, pad_last_batch=True, name="Reader") dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 if is_training: images = fn.image_decoder_random_crop( images, device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.8, 1.25], random_area=[0.1, 1.0], num_attempts=100) images = fn.resize(images, device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) mirror = fn.random.coin_flip(probability=0.5) else: images = fn.image_decoder(images, device=decoder_device, output_type=types.RGB) images = fn.resize(images, device=dali_device, size=size, mode="not_smaller", interp_type=types.INTERP_TRIANGULAR) mirror = False images = fn.crop_mirror_normalize( images.gpu(), dtype=types.FLOAT, output_layout="CHW", crop=(crop, crop), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=mirror) labels = labels.gpu() return images, labels
def get_dali_pipeline(tfrec_filenames, tfrec_idx_filenames, height, width, shard_id, num_gpus, dali_cpu=True, training=True): inputs = fn.readers.tfrecord(path=tfrec_filenames, index_path=tfrec_idx_filenames, random_shuffle=training, shard_id=shard_id, num_shards=num_gpus, initial_fill=10000, features={ 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/class/label': tfrec.FixedLenFeature([1], tfrec.int64, -1), 'image/class/text': tfrec.FixedLenFeature([], tfrec.string, ''), 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0) }) decode_device = "cpu" if dali_cpu else "mixed" resize_device = "cpu" if dali_cpu else "gpu" if training: images = fn.image_decoder_random_crop(inputs["image/encoded"], device=decode_device, output_type=types.RGB, random_aspect_ratio=[0.75, 1.25], random_area=[0.05, 1.0], num_attempts=100) images = fn.resize(images, device=resize_device, resize_x=width, resize_y=height) else: images = fn.image_decoder(inputs["image/encoded"], device=decode_device, output_type=types.RGB) # Make sure that every image > 224 for CropMirrorNormalize images = fn.resize(images, device=resize_device, resize_shorter=256) images = fn.crop_mirror_normalize(images.gpu(), dtype=types.FLOAT, crop=(height, width), mean=[123.68, 116.78, 103.94], std=[58.4, 57.12, 57.3], output_layout="HWC", mirror=fn.random.coin_flip()) labels = inputs["image/class/label"].gpu() labels -= 1 # Change to 0-based (don't use background class) return images, labels