def __init__(self, root_dir, batch_size, num_threads, device_id, use_shift_scale=False, num_shards=None, shard_id=None): super().__init__(batch_size, num_threads, device_id, seed=12) self.random_angle = ops.Uniform(range=(0, 360.0)) self.random = ops.Uniform(range=(0.5, 1.5)) self.random_coin = ops.CoinFlip() self.input = ops.FileReader( file_root=root_dir, random_shuffle=True, num_shards=num_shards, shard_id=shard_id, ) self.decode = ops.ImageDecoder(device='mixed') self.rotate = ops.Rotate(device='gpu', interp_type=types.INTERP_LINEAR) self.crop = ops.Crop(device='gpu', crop=(224, 224)) self.use_shift_scale = use_shift_scale if self.use_shift_scale: self.shift_scale = ops.RandomResizedCrop( device='gpu', size=(224, 224), interp_type=types.INTERP_LINEAR, random_area=(0.3, 1.0), ) self.flip = ops.Flip(device='gpu') self.color_twist = ops.ColorTwist(device='gpu')
def __init__(self, DATA_PATH, input_height, batch_size, copies, stage, num_threads, device_id, seed=1729): super(SimCLRTransform, self).__init__(batch_size, num_threads, device_id, seed=seed) #this lets our pytorch compat function find the length of our dataset self.num_samples = len(ImageFolder(DATA_PATH)) self.copies = copies self.input_height = input_height self.stage = stage self.input = ops.FileReader(file_root=DATA_PATH, random_shuffle=True, seed=seed) self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu") self.to_int32_cpu = ops.Cast(dtype=types.INT32, device="cpu") self.coin = ops.random.CoinFlip(probability=0.5) self.uniform = ops.random.Uniform(range=[0.6, 0.9]) self.blur_amt = ops.random.Uniform(values=[ float(i) for i in range(1, int(0.1 * self.input_height), 2) ]) self.angles = ops.random.Uniform(range=[0, 360]) self.cast = ops.Cast(dtype=types.FLOAT, device='gpu') self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.crop = ops.RandomResizedCrop(size=self.input_height, minibatch_size=batch_size, random_area=[0.75, 1.0], device="gpu") self.resize = ops.Resize(resize_x=self.input_height, resize_y=self.input_height, device="gpu") self.flip = ops.Flip(vertical=self.coin(), horizontal=self.coin(), device="gpu") self.colorjit_gray = ops.ColorTwist(brightness=self.uniform(), contrast=self.uniform(), hue=self.uniform(), saturation=self.uniform(), device="gpu") self.blur = ops.GaussianBlur(window_size=self.to_int32_cpu( self.blur_amt()), device="gpu") self.rotate = ops.Rotate( angle=self.angles(), keep_size=True, interp_type=types.DALIInterpType.INTERP_LINEAR, device="gpu") self.swapaxes = ops.Transpose(perm=[2, 0, 1], device="gpu")
def __init__(self, batch_size, num_threads, device_id, eii): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.input = ops.ExternalSource() self.id_label = ops.ExternalSource() self.boxes = ops.ExternalSource() self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.resize = ops.Resize(device="gpu", resize_x=256, resize_y=256) self.twist = ops.ColorTwist(device="gpu") self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(256, 256), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, ) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.external_data = eii self.iterator = iter(self.external_data)
def __init__(self, data_root, data_list, sampler, crop, colorjitter=None): super(ImageNetTrainPipeV2, self).__init__() # print('data root: {}, data list: {}, len(sampler_index): {}'.format( # data_root, data_list, len(sampler))) self.mc_input = ops.McReader(file_root=data_root, file_list=data_list, sampler_index=list(sampler)) self.colorjitter = colorjitter dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to handle all # images from full-sized ImageNet without additional reallocations self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512) self.res = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) if self.colorjitter is not None: self.colorjit = ops.ColorTwist(device="gpu") self.rng_brightness = ops.Uniform(range=(1.0 - self.colorjitter[0], 1.0 + self.colorjitter[0])) self.rng_contrast = ops.Uniform(range=(1.0 - self.colorjitter[1], 1.0 + self.colorjitter[1])) self.rng_saturation = ops.Uniform(range=(1.0 - self.colorjitter[2], 1.0 + self.colorjitter[2])) self.rng_hue = ops.Uniform(range=(-self.colorjitter[3], self.colorjitter[3]))
def __init__(self, DATA_PATH, input_height, batch_size, num_threads, device_id): super(SimCLRTrainDataTransform, self).__init__(batch_size, num_threads, device_id, seed=12) self.COPIES = 3 self.input_height = input_height self.input = ops.FileReader(file_root=DATA_PATH, random_shuffle=True, seed=12) self.coin = ops.CoinFlip(probability=0.5) self.uniform = ops.Uniform(range=[0.7, 1.3]) #-1 to 1 #read image (I think that has to be cpu, do a mixed operation to decode into gpu) self.decode = ops.ImageDecoder(device='mixed', output_type=types.RGB) self.crop = ops.RandomResizedCrop(size=self.input_height, device="gpu") self.flip = ops.Flip(vertical=self.coin(), horizontal=self.coin(), device="gpu") self.colorjit_gray = ops.ColorTwist(brightness=self.uniform(), contrast=self.uniform(), hue=self.uniform(), saturation=self.uniform(), device="gpu") self.blur = ops.GaussianBlur(window_size=int(0.1 * self.input_height), device="gpu") self.swapaxes = ops.Transpose(perm=[2, 0, 1], device="gpu") self.to_int64 = ops.Cast(dtype=types.INT64, device="gpu")
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, seed=12, local_rank=0, world_size=1, spos_pre=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=seed + device_id) color_space_type = types.BGR if spos_pre else types.RGB self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) self.decode = ops.ImageDecoder(device="mixed", output_type=color_space_type) self.res = ops.RandomResizedCrop(device="gpu", size=crop, interp_type=types.INTERP_LINEAR if spos_pre else types.INTERP_TRIANGULAR) self.twist = ops.ColorTwist(device="gpu") self.jitter_rng = ops.Uniform(range=[0.6, 1.4]) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=color_space_type, mean=0. if spos_pre else [0.485 * 255, 0.456 * 255, 0.406 * 255], std=1. if spos_pre else [0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, root, list_path, crop, shard_id, num_shards, coji=False, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.read = ops.FileReader(file_root=root, file_list=list_path, shard_id=shard_id, num_shards=num_shards, random_shuffle=True, initial_fill=1024) # Let user decide which pipeline works dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images # from full-sized ImageNet without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 self.decode = ops.ImageDecoderRandomCrop( device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 1.33333333], random_area=[0.08, 1.0], num_attempts=100) self.resize = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.cmnp = ops.CropMirrorNormalize( device=dali_device, output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[x * 255 for x in IMAGENET_MEAN], std=[x * 255 for x in IMAGENET_STD]) self.coin = ops.CoinFlip(probability=0.5) self.coji = coji if self.coji: self.twist = ops.ColorTwist(device=dali_device) self.brightness_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.contrast_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4]) self.saturation_rng = ops.Uniform(range=[1.0 - 0.4, 1.0 + 0.4])
def __new__(cls, **kwargs): """Create a ``Brightness`` operator. Returns ------- nvidia.dali.ops.Brightness The operator. """ return ops.ColorTwist(device=context.get_device_type(), **kwargs)
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True) #let user decide which pipeline works him bets for RN version he runs dali_device = 'cpu' if dali_cpu else 'gpu' decoder_device = 'cpu' if dali_cpu else 'mixed' # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet # without additional reallocations device_memory_padding = 211025920 if decoder_device == 'mixed' else 0 host_memory_padding = 140544512 if decoder_device == 'mixed' else 0 ## randomly crop and resize, crop sampling, self.decode = ops.ImageDecoderRandomCrop(device=decoder_device, output_type=types.RGB, device_memory_padding=device_memory_padding, host_memory_padding=host_memory_padding, random_aspect_ratio=[0.75, 4/3.0], random_area=[0.08, 1.0], num_attempts=10) self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR) self.vert_flip=ops.Flip(device='gpu', horizontal=0) self.vert_coin = ops.CoinFlip(probability=0.075) ##color jitter https://www.gitmemory.com/ruiyuanlu, https://github.com/NVIDIA/DALI/issues/336 self.twist = ops.ColorTwist(device="gpu") self.rng1 = ops.Uniform(range=[0.6, 1.4]) self.rng2 = ops.Uniform(range=[-102, 102]) ## factor=0.4, 0.4*255, -0.4*255 #self.rng2 = ops.Uniform(range=[-51, 51]) ## factor=0.2, 0.2*255, -0.2*255 self.flip = ops.Flip(device = "gpu", vertical = 1, horizontal = 0) self.color_jitter = ops.ColorTwist(device="gpu", hue=0.2, brightness=0.4, contrast=0.4, saturation=0.4) self.cmnp = ops.CropMirrorNormalize(device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255,0.456 * 255,0.406 * 255], std=[0.229 * 255,0.224 * 255,0.225 * 255]) ## this is torch.transform.RandomHorizontalFlip self.mirrorcoin = ops.CoinFlip(probability=0.5) self.uniform = ops.Uniform(range = (0.0, 1.0)) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, alpha=[0.5, 1.5], delta=[0.875, 1.125], gamma=[-0.5, 0.5]): self.contrast = ops.Uniform(range=gamma) self.brightness = ops.Uniform(range=[-0.125, 0.125]) self.saturation = ops.Uniform(range=gamma) self.hue = ops.Uniform(range=gamma) self.ct = ops.ColorTwist(device="gpu") self.toss_a_coin = ops.CoinFlip(probability=0.5)
def __init__(self, batch_size, num_threads, device_id, crop, colorjitter=None, dali_cpu=False): super(ImageNetTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id) self.data_input = ops.ExternalSource() self.label_input = ops.ExternalSource() self.colorjitter = colorjitter # let user decide which pipeline works him bets for RN version he runs if dali_cpu: dali_device = "cpu" self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB) self.res = ops.Resize(resize_x=crop, resize_y=crop) else: dali_device = "gpu" # This padding sets the size of the internal nvJPEG buffers to be able to # handle all images from full-sized ImageNet without additional reallocations self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512) self.res = ops.RandomResizedCrop(device=dali_device, size=(crop, crop)) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(crop, crop), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) if self.colorjitter is not None: self.colorjit = ops.ColorTwist(device="gpu") self.rng_brightness = ops.Uniform(range=(1.0 - self.colorjitter[0], 1.0 + self.colorjitter[0])) self.rng_contrast = ops.Uniform(range=(1.0 - self.colorjitter[1], 1.0 + self.colorjitter[1])) self.rng_saturation = ops.Uniform(range=(1.0 - self.colorjitter[2], 1.0 + self.colorjitter[2])) self.rng_hue = ops.Uniform(range=(-self.colorjitter[3], self.colorjitter[3]))
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() else: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, random_shuffle=True, skip_empty=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, seed, data_iterator, kind="new", num_threads=1, device_id=0): super(ColorTwistPipeline, self).__init__(batch_size, num_threads, device_id, seed=seed) self.input = ops.ExternalSource(source=data_iterator) self.hue = ops.random.Uniform(range=[-20., 20.], seed=seed) self.sat = ops.random.Uniform(range=[0., 1.], seed=seed) self.bri = ops.random.Uniform(range=[0., 2.], seed=seed) self.con = ops.random.Uniform(range=[0., 2.], seed=seed) self.kind = kind if kind == "new": self.color_twist = ops.ColorTwist(device="gpu") elif kind == "old": self.color_twist = ops.OldColorTwist(device="gpu") else: self.color_twist = ops.OldColorTwist(device="cpu")
def __init__(self, file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=42 + device_id) self.reader = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, shuffle_after_epoch=True) self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.roi_decode = ops.ImageDecoderSlice(device="mixed") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.twist = ops.ColorTwist(device="gpu") self.cmnp = ops.CropMirrorNormalize(device="gpu", mean=[104., 117., 123.], std=[1., 1., 1.], output_dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.coin = ops.CoinFlip(probability=0.5) self.build()
def __init__(self, batch_size, file_root, annotations_file, default_boxes, seed, device_id=0, num_threads=4): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, ratio=True, ltrb=True, random_shuffle=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.8, 1.0], ltrb=True) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255., 0.456 * 255., 0.406 * 255.], std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, db_prefix, input_shape, batch_size, data_params, for_train, num_threads, device_id, num_shards): super(HybridRecPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=2) self.for_train = for_train self.input = ops.MXNetReader( path=[db_prefix + '.rec'], index_path=[db_prefix + '.idx'], random_shuffle=data_params['shuffle'] if for_train else False, shard_id=device_id, num_shards=num_shards) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.cmnp = ops.CropMirrorNormalize( device="gpu", dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_shape[1], input_shape[2]), mean=data_params['mean'] if isinstance(data_params['mean'], list) else [data_params['mean'] for i in range(input_shape[0])], std=data_params['std'] if isinstance(data_params['std'], list) else [data_params['std'] for i in range(input_shape[0])]) if self.for_train: self.rotate = ops.Rotate(device="gpu", interp_type=types.INTERP_LINEAR) self.color = ops.ColorTwist(device='gpu') self.rng_angle = ops.Uniform( range=(-float(data_params['max_rotate_angle']), +float(data_params['max_rotate_angle']))) self.rng_contrast = ops.Uniform( range=(1.0 - data_params['contrast'], 1.0 + data_params['contrast'])) self.rng_brightness = ops.Uniform( range=(1.0 - data_params['brightness'], 1.0 + data_params['brightness'])) self.rng_saturation = ops.Uniform( range=(1.0 - data_params['saturation'], 1.0 + data_params['saturation'])) self.rng_hue = ops.Uniform(range=(1.0 - data_params['hue'], 1.0 + data_params['hue'])) self.coin = ops.CoinFlip( probability=0.5) if data_params['rand_mirror'] else 0
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, exec_async=False, exec_pipelined=False) dali_device = "gpu" self.pca_lighting = Lighting(alphastd=0.1) self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) # self.lighting = ops.PythonFunction(function=lighting, device="gpu") self.lighting = ops.PythonFunction(device="gpu", function=self.pca_lighting) self.jitter = ops.ColorTwist(device="gpu", brightness=0.4, contrast=0.4, saturation=0.4, hue=0.0) self.res = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.08, 1.25]) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) self.coin = ops.CoinFlip(probability=0.5) print('DALI "{0}" variant'.format(dali_device))
def __init__(self, db_prefix, for_train, input_size, batch_size, num_threads, device_id, num_gpus): super(HybridRecPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id, prefetch_queue_depth=2) self.for_train = for_train self.input = ops.MXNetReader(path=[db_prefix + ".rec"], index_path=[db_prefix + ".idx"], random_shuffle=for_train, shard_id=device_id, num_shards=num_gpus) self.resize = ops.Resize(device="gpu", resize_x=input_size, resize_y=input_size) self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, crop=(input_size, input_size), image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) if self.for_train: self.decode = ops.ImageDecoderRandomCrop( device="mixed", output_type=types.RGB, random_aspect_ratio=[3 / 4, 4 / 3], random_area=[0.08, 1.0], num_attempts=100) self.color = ops.ColorTwist(device='gpu') self.rng_brightness = ops.Uniform(range=(0.6, 1.4)) self.rng_contrast = ops.Uniform(range=(0.6, 1.4)) self.rng_saturation = ops.Uniform(range=(0.6, 1.4)) self.mirror_coin = ops.CoinFlip(probability=0.5) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
def __init__(self, path, batch_size, num_threads, device_id, seed, output_fp16=False, output_nhwc=False, pad_output=False): super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed=seed) self.input = ops.FileReader(file_root=path) self.decode = ops.ImageDecoder(device='cpu', output_type=types.RGB) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(args.batch_size, args.num_workers, device_id, args.prefetch, args.seed) # Reading COCO dataset self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB) self.decode_crop = ops.HostDecoderSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.decode_gpu_crop = ops.nvJPEGDecoderSlice(device="mixed", output_type=types.RGB) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, default_boxes, args, seed): super(COCOPipeline, self).__init__( batch_size=args.batch_size, device_id=args.local_rank, num_threads=args.num_workers, seed=seed) try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader( file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) self.slice = ops.Slice(device="cpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if args.fp16 else types.FLOAT self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=0, output_dtype=output_dtype, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="cpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder( device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, default_boxes, root, annFile, batch_size, mean, std, local_rank, num_workers, seed): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=local_rank, num_threads=num_workers, seed=seed) # try: # shard_id = torch.distributed.get_rank() # num_shards = torch.distributed.get_world_size() # except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=root, annotations_file=annFile, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques # expand 1~2 self.paste_ratio = ops.Uniform(range=[1, 2]) self.paste_pos = ops.Uniform(range=[0, 1]) self.paste = ops.Paste(device="gpu", fill_value=tuple(mean)) self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True) # random crop self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="gpu", resize_x=320, resize_y=320, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(320, 320), mean=mean, std=std, mirror=0, output_dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, batch_size, num_threads, device_id, image_size, tfrecord_path, index_path, config, shard_id=0): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id) self.image_size = image_size self.input = self._input(tfrecord_path, index_path, shard_id=shard_id) # The nvjpeg decoder throws an error for some unsupported jpegs. # until this is fixed, we'll use the host decoder, which runs on the # CPU. # self.decode = ops.nvJPEGDecoder(device="mixed", # output_type=types.RGB) self.decode = ops.HostDecoder(device="cpu", output_type=types.RGB) self.resize = ops.Resize(device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=image_size, resize_y=image_size) self.resize_large = ops.Resize(device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=image_size * config.zoom_scale, resize_y=image_size * config.zoom_scale) self.color_twist = ops.ColorTwist(device="gpu", ) self.crop_mirror_normalize = ops.CropMirrorNormalize( device="gpu", crop=image_size, output_dtype=types.FLOAT, image_type=types.RGB, output_layout=types.DALITensorLayout.NHWC, mean=122.5, std=255.0) self.crop = ops.Crop( device="gpu", crop=image_size, ) self.cast = ops.Cast(device="gpu", dtype=types.DALIDataType.INT64) self.rotate = ops.Rotate(device="gpu", fill_value=0) self.flip = ops.Flip(device="gpu") self.coin = ops.CoinFlip(probability=0.5) self.rotate_rng = ops.Uniform(range=(config.rotate_angle_min, config.rotate_angle_max)) self.crop_x_rng = ops.Uniform(range=(0.0, config.crop_x_max)) self.crop_y_rng = ops.Uniform(range=(0.0, config.crop_y_max)) self.hue_rng = ops.Uniform(range=(config.hue_min, config.hue_max)) self.contrast_rng = ops.Uniform(range=(config.contrast_min, config.contrast_max)) self.saturation_rng = ops.Uniform(range=(config.saturation_min, config.saturation_max)) self.brightness_rng = ops.Uniform(range=(config.brightness_min, config.brightness_max)) self.iter = 0
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(batch_size=args.batch_size, num_threads=args.num_workers, device_id=device_id, prefetch_queue_depth=args.prefetch, seed=args.seed) # Reading COCO dataset self.input = ops.readers.COCO(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.decoders.Image(device="cpu", output_type=types.RGB) self.decode_crop = ops.decoders.ImageSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.decoders.Image(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.decode_gpu_crop = ops.decoders.ImageSlice(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.hsv_cpu = ops.Hsv(device="cpu", dtype=types.FLOAT) self.hsv_gpu = ops.Hsv(device="gpu", dtype=types.FLOAT) self.bc_cpu = ops.BrightnessContrast(device="cpu", dtype=types.UINT8, contrast_center=128) self.bc_gpu = ops.BrightnessContrast(device="gpu", dtype=types.UINT8, contrast_center=128) self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.saturation_rng = ops.random.Uniform(range=[0.8, 1.2]) self.contrast_rng = ops.random.Uniform(range=[0.5, 1.5]) self.brighness_rng = ops.random.Uniform(range=[0.875, 1.125]) self.hue_rng = ops.random.Uniform(range=[-45, 45])
def __init__(self, data, batch_size, image_size, split, silent, num_threads, device_id, data_loader, color_space, shuffle=False): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id) self.split = split self.color_space = color_space self.data_loader = data_loader if shuffle: data.shuffle() self.sourceIterator = iter(data) self.rowBatch = ops.ExternalSource() self.imFaceBatch = ops.ExternalSource() self.imEyeLBatch = ops.ExternalSource() self.imEyeRBatch = ops.ExternalSource() self.imFaceGridBatch = ops.ExternalSource() self.gazeBatch = ops.ExternalSource() self.indexBatch = ops.ExternalSource() mean = None std = None if color_space == 'RGB': output_type = types.RGB mean=[0.485 * 255, 0.456 * 255, 0.406 * 255] std=[0.229 * 255, 0.224 * 255, 0.225 * 255] elif color_space == 'YCbCr': output_type = types.YCbCr elif color_space == 'L': output_type = types.GRAY elif color_space == 'BGR': output_type = types.BGR else: print("Unsupported color_space:", color_space) # Variation range for Saturation, Contrast, Brightness and Hue self.dSaturation = ops.Uniform(range=[0.9, 1.1]) self.dContrast = ops.Uniform(range=[0.9, 1.1]) self.dBright = ops.Uniform(range=[0.9, 1.1]) self.dHue = ops.Uniform(range=[-0.1, 0.1]) if data_loader == "cpu": print("Error: cpu data loader shouldn't be handled by DALI") else: # ---------- Decoding Operations --------- # # ImageDecoder in mixed mode doesn't support YCbCr # Ref: https://github.com/NVIDIA/DALI/pull/582/files self.decode = ops.ImageDecoder(device="cpu", output_type=output_type) # ---------- Augmentation Operations --------- # # execute rest of the operations on the target device based upon the mode device = "cpu" if data_loader == "dali_cpu" else "gpu" self.resize_big = ops.Resize(device=device, resize_x=240, resize_y=240) # depreciated replace with HSV and ops.BrightnessContrast soon self.color_jitter = ops.ColorTwist(device=device, image_type=output_type) # random area 0.93-1.0 corresponds to croping randomly from an image of size between (224-240) self.crop = ops.RandomResizedCrop(device=device, random_area=[0.93, 0.93], size=image_size) # ---------- Normalization Operations --------- # self.resize = ops.Resize(device=device, resize_x=image_size[0], resize_y=image_size[1]) self.norm = ops.CropMirrorNormalize(device=device, output_dtype=types.FLOAT, output_layout='CHW', image_type=output_type, mean=mean, std=std)
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() else: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, random_shuffle=True, skip_empty=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.rotate = ops.Rotate(device="gpu", angle=30, interp_type=types.INTERP_LINEAR, fill_value=0) self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) # Will flip with probability provided in CoinFlip self.flip = ops.Flip(device='gpu') self.coin_flip_v = ops.CoinFlip(probability=0.1) self.coin_flip_h = ops.CoinFlip(probability=0.1) # bbox flipping self.bbflip = ops.BbFlip(device='gpu', ltrb=True) # paste self.paste = ops.Paste(device='gpu', fill_value=0) self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, 2)) self.bbpaste = ops.BBoxPaste(device='cpu', ltrb=True) # prospective self.prospective_crop = ops.RandomBBoxCrop( device='cpu', aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5], scaling=[0.8, 1.0], ltrb=True ) # slice (after prospective crop) self.slice = ops.Slice(device='gpu') # color self.water = ops.Water(device='gpu') # self.contrast = ops.BrightnessContrast(device="gpu", brightness=0.5, contrast=1.5) # self.hsv = ops.Hsv(device="gpu", hue=45., saturation=0.2) self.sphere = ops.Sphere(device='gpu') self.warpaffine = ops.WarpAffine(device="gpu", matrix=[1.0, 0.8, 0.0, 0.0, 1.2, 0.0], interp_type=types.INTERP_LINEAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15, dali_cache=-1, dali_async=True, use_nvjpeg=False, use_roi=False): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed, exec_pipelined=dali_async, exec_async=dali_async) self.use_roi = use_roi self.use_nvjpeg = use_nvjpeg try: shard_id = torch.distributed.get_rank() except RuntimeError: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, skip_empty=True, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), shuffle_after_epoch=(dali_cache <= 0)) if use_nvjpeg: if use_roi: self.decode = ops.nvJPEGDecoderSlice(device="mixed", output_type=types.RGB) # handled in ROI decoder self.slice = None else: if dali_cache > 0: self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, cache_size=dali_cache * 1024, cache_type="threshold", cache_threshold=10000) else: self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.slice = ops.Slice(device="gpu") self.crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) else: self.decode = ops.HostDecoder(device="cpu", output_type=types.RGB) # handled in the cropper self.slice = None self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) # Augumentation techniques (in addition to random crop) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.) std_val = list(np.array([0.229, 0.224, 0.225]) * 255.) self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=mean_val, std=std_val, mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, device_id, file_root, meta_files_path, annotations_file, num_gpus, anchors_ltrb_list, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15, dali_cache=-1, dali_async=True, use_nvjpeg=False): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed, exec_pipelined=dali_async, exec_async=dali_async) self.use_nvjpeg = use_nvjpeg try: shard_id = torch.distributed.get_rank() # Note: <= 19.05 was a RuntimeError, 19.06 is now throwing AssertionError except (RuntimeError, AssertionError): shard_id = 0 if meta_files_path == None: self.c_input = ops.COCOReader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, skip_empty=True, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) else: self.c_input = ops.COCOReader( file_root=file_root, meta_files_path=meta_files_path, shard_id=shard_id, num_shards=num_gpus, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) self.c_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) decoder_device = 'mixed' if use_nvjpeg else 'cpu' # fused decode and slice. This is "region-of-interest" (roi) decoding self.m_decode = ops.ImageDecoderSlice(device=decoder_device, output_type=types.RGB) self.g_slice = None # special case for using dali decode caching: the caching decoder can't # be fused with slicing (because we need to slice the decoded image # differently every epoch), so we need to unfuse decode and slice: if dali_cache > 0 and use_nvjpeg: self.m_decode = ops.ImageDecoder(device='mixed', output_type=types.RGB, cache_size=dali_cache * 1024, cache_type="threshold", cache_threshold=10000) self.g_slice = ops.Slice(device="gpu") # Augumentation techniques (in addition to random crop) self.g_twist = ops.ColorTwist(device="gpu") self.g_resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.) std_val = list(np.array([0.229, 0.224, 0.225]) * 255.) self.g_normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=mean_val, std=std_val, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.c_rng1 = ops.Uniform(range=[0.5, 1.5]) self.c_rng2 = ops.Uniform(range=[0.875, 1.125]) self.c_rng3 = ops.Uniform(range=[-0.5, 0.5]) flip_probability = 0.5 self.c_flip_coin = ops.CoinFlip( probability=flip_probability) # coin_rnd self.c_bbflip = ops.BbFlip(device="cpu", ltrb=True) self.g_box_encoder = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=anchors_ltrb_list, offset=True, stds=[0.1, 0.1, 0.2, 0.2], scale=300) self.g_cast = ops.Cast(device="gpu", dtype=types.FLOAT)