def __init__(self, batch_size, num_threads, device_id, prefetch, seed): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id, prefetch_queue_depth=prefetch) self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB) self.decode_crop = ops.HostDecoderSlice(device="cpu", output_type=types.RGB) self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=seed) self.crop2 = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, seed=seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.flip_cpu = ops.Flip(device="cpu") self.bb_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bb_flip_gpu = ops.BbFlip(device="gpu", ltrb=True)
def __init__(self, batch_size, num_threads, path, training, annotations, world, device_id, mean, std, resize, max_size, stride): super().__init__(batch_size=batch_size, num_threads=num_threads, device_id = device_id, prefetch_queue_depth=num_threads, seed=42) self.path = path self.training = training self.stride = stride self.iter = 0 self.reader = ops.COCOReader(annotations_file=annotations, file_root=path, num_shards=world,shard_id=torch.cuda.current_device(), ltrb=True, ratio=True, shuffle_after_epoch=True, save_img_ids=True) self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.bbox_crop = ops.RandomBBoxCrop(device='cpu', ltrb=True, scaling=[0.3, 1.0], thresholds=[0.1,0.3,0.5,0.7,0.9]) self.bbox_flip = ops.BbFlip(device='cpu', ltrb=True) self.img_flip = ops.Flip(device='gpu') self.coin_flip = ops.CoinFlip(probability=0.5) if isinstance(resize, list): resize = max(resize) self.rand_resize = ops.Uniform(range=[resize, float(max_size)]) self.resize_train = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, save_attrs=True) self.resize_infer = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, resize_longer=max_size, save_attrs=True) padded_size = max_size + ((self.stride - max_size % self.stride) % self.stride) self.pad = ops.Paste(device='gpu', fill_value = 0, ratio=1.1, min_canvas_size=padded_size, paste_x=0, paste_y=0) self.normalize = ops.CropMirrorNormalize(device='gpu', mean=mean, std=std, crop=padded_size, crop_pos_x=0, crop_pos_y=0)
def __init__(self, file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=42 + device_id) self.reader = ops.COCOReader( file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, shuffle_after_epoch=True, pad_last_batch=True) self.crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.roi_decode = ops.ImageDecoderSlice(device="mixed") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.hsv = ops.Hsv(device="gpu", dtype=types.FLOAT) # use float to avoid clipping and # quantizing the intermediate result self.bc = ops.BrightnessContrast(device="gpu", contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8) self.cmnp = ops.CropMirrorNormalize( device="gpu", mean=[104., 117., 123.], std=[1., 1., 1.], dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.coin = ops.CoinFlip(probability=0.5) self.build()
def __init__(self, target_size, preproc_param, training=False): self.training = training mean = preproc_param.MEAN std = preproc_param.STD bri_delta = preproc_param.BRI_DELTA hue_delta = preproc_param.HUE_DELTA max_expand_ratio = preproc_param.MAX_EXPAND_RATIO contrast_range = preproc_param.CONTRAST_RANGE saturation_range = preproc_param.SATURATION_RANGE crop_aspect_ratio = preproc_param.CROP_ASPECT_RATIO crop_scale = preproc_param.CROP_SCALE crop_attempts = preproc_param.CROP_ATTEMPTS # decoder self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) # ssd crop self.bbox_crop = ops.RandomBBoxCrop( device="cpu", bbox_layout="xyXY", scaling=crop_scale, aspect_ratio=crop_aspect_ratio, allow_no_crop=True, thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], num_attempts=crop_attempts, ) # color twist self.uniform_con = ops.Uniform(range=contrast_range) self.uniform_bri = ops.Uniform( range=(1.0 - bri_delta / 256.0, 1.0 + bri_delta / 256.0) ) self.uniform_sat = ops.Uniform(range=saturation_range) self.uniform_hue = ops.Uniform(range=(-hue_delta, hue_delta)) self.hsv = ops.Hsv(device="gpu") self.contrast = ops.BrightnessContrast(device="gpu") # hflip self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True) self.img_flip = ops.Flip(device="gpu") self.coin_flip = ops.CoinFlip(probability=0.5) # past self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, max_expand_ratio)) self.paste = ops.Paste(device="gpu", fill_value=mean) self.bbox_paste = ops.BBoxPaste(device="cpu", ltrb=True) # resize and normalize self.resize = ops.Resize( device="gpu", interp_type=types.DALIInterpType.INTERP_CUBIC, resize_x=target_size[0], resize_y=target_size[1], save_attrs=True, ) self.normalize = ops.CropMirrorNormalize(device="gpu", mean=mean, std=std)
def __init__(self, p: float = 0.5): """Initialization Args: p (float, optional): Probability to apply this transformation. Defaults to 0.5. """ self.flip_coin_vflip = ops.CoinFlip(probability=p) self.image_vflip = ops.Flip(device='gpu', horizontal=0) self.bbox_vflip = ops.BbFlip(device='cpu', horizontal=0) self.ldmrks_vflip = ops.CoordFlip(layout='xy', device='cpu', flip_x=0)
def __init__(self, batch_size, num_threads, path, training, annotations, world, device_id, mean, std, resize, max_size, stride, rotate_augment=False, augment_brightness=0.0, augment_contrast=0.0, augment_hue=0.0, augment_saturation=0.0): super().__init__(batch_size=batch_size, num_threads=num_threads, device_id=device_id, prefetch_queue_depth=num_threads, seed=42) self.path = path self.training = training self.stride = stride self.iter = 0 self.rotate_augment = rotate_augment self.augment_brightness = augment_brightness self.augment_contrast = augment_contrast self.augment_hue = augment_hue self.augment_saturation = augment_saturation self.reader = ops.COCOReader(annotations_file=annotations, file_root=path, num_shards=world, shard_id=torch.cuda.current_device(), ltrb=True, ratio=True, shuffle_after_epoch=True, save_img_ids=True) self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.bbox_crop = ops.RandomBBoxCrop(device='cpu', ltrb=True, scaling=[0.3, 1.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9]) self.bbox_flip = ops.BbFlip(device='cpu', ltrb=True) self.img_flip = ops.Flip(device='gpu') self.coin_flip = ops.CoinFlip(probability=0.5) self.bc = ops.BrightnessContrast(device='gpu') self.hsv = ops.Hsv(device='gpu') # Random number generation for augmentation self.brightness_dist = ops.NormalDistribution(mean=1.0, stddev=augment_brightness) self.contrast_dist = ops.NormalDistribution(mean=1.0, stddev=augment_contrast) self.hue_dist = ops.NormalDistribution(mean=0.0, stddev=augment_hue) self.saturation_dist = ops.NormalDistribution(mean=1.0, stddev=augment_saturation) if rotate_augment: raise RuntimeWarning("--augment-rotate current has no effect when using the DALI data loader.") if isinstance(resize, list): resize = max(resize) self.rand_resize = ops.Uniform(range=[resize, float(max_size)]) self.resize_train = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, save_attrs=True) self.resize_infer = ops.Resize(device='gpu', interp_type=types.DALIInterpType.INTERP_CUBIC, resize_longer=max_size, save_attrs=True) padded_size = max_size + ((self.stride - max_size % self.stride) % self.stride) self.pad = ops.Paste(device='gpu', fill_value=0, ratio=1.1, min_canvas_size=padded_size, paste_x=0, paste_y=0) self.normalize = ops.CropMirrorNormalize(device='gpu', mean=mean, std=std, crop=(padded_size, padded_size), crop_pos_x=0, crop_pos_y=0)
def __init__(self, batch_size, num_threads, device_id): super(COCOPipeline, self).__init__(batch_size, num_threads, device_id, exec_async=False, exec_pipelined=False, seed=15) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=num_gpus, ratio=True, ltrb=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, 2)) self.coin = ops.CoinFlip(probability=0.5) self.coin2 = ops.CoinFlip(probability=0.5) self.paste = ops.Paste(device="gpu", fill_value=(32, 64, 128)) self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True) self.prospective_crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5], scaling=[0.8, 1.0], ltrb=True) self.slice = ops.Slice(device="gpu") # resize self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_LINEAR, resize_shorter=800, max_size=1200) self.shape = ops.Shapes(device="gpu") # normalize and convert hwc to chw self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) # padding axes=(0,1) -> hwc, axes=(1,2) -> chw self.padding = ops.Pad(device="gpu", fill_value=0, axes=(1, 2), shape=(800, 1200))
def __init__(self, batch_size, file_root, annotations_file, default_boxes, seed, device_id=0, num_threads=4): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, ratio=True, ltrb=True, random_shuffle=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.8, 1.0], ltrb=True) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255., 0.456 * 255., 0.406 * 255.], std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, file_root, annotations_file, batch_size, num_threads, device_id=0, num_gpus=1, mean=(123.675, 116.28, 103.53), stddev=(1., 1., 1.), random_shuffle=True): super(COCOPipeline, self).__init__(batch_size, num_threads, device_id, seed = 15) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=num_gpus, ratio=True, skip_empty=True, prefetch_queue_depth=32, random_shuffle=True) self.decode = ops.ImageDecoder(device='mixed', output_type=types.BGR) self.resize = ops.Resize(device='gpu', max_size=1216, resize_shorter=800) self.flip = ops.CoinFlip(device='cpu') self.bbox_flip = ops.BbFlip(device='gpu') self.CMN = ops.CropMirrorNormalize(device='gpu', mean=mean, std=stddev, output_layout='HWC') self.image_pad = ops.Pad(device='gpu', fill_value=0, axes=(0,1), shape=(1216, 1216)) self.bbox_pad = ops.Pad(device='gpu', fill_value=0, axes=(0,), shape=(100,)) self.label_pad = ops.Pad(device='gpu', fill_value=-1, axes=(0,), shape=(100,)) self.get_shape = ops.Shapes(device='gpu') self.float_cast = ops.Cast(device='gpu', dtype=types.FLOAT)
def __new__(cls, horizontal=None, vertical=None, ltrb=True, **kwargs): """Create a ``BbFlip`` operator. Parameters ---------- horizontal : int, optional Whether to apply the horizontal flip. vertical : int, optional Whether to apply the vertical flip. ltrb : bool, optional, default=True Indicate the bbox is ``ltrb`` or ``xywh`` format. Returns ------- nvidia.dali.ops.BbFlip The operator. """ return ops.BbFlip(horizontal=horizontal, vertical=vertical, ltrb=ltrb, device=context.get_device_type(), **kwargs)
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(args.batch_size, args.num_workers, device_id, args.prefetch, args.seed) # Reading COCO dataset self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB) self.decode_crop = ops.HostDecoderSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.decode_gpu_crop = ops.nvJPEGDecoderSlice(device="mixed", output_type=types.RGB) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, default_boxes, args, seed): super(COCOPipeline, self).__init__(batch_size=args.batch_size, device_id=args.local_rank, num_threads=args.num_workers, seed=seed) try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) self.slice = ops.Slice(device="cpu") self.hsv = ops.Hsv( device="gpu", dtype=types.FLOAT) # use float to avoid clipping and # quantizing the intermediate result self.bc = ops.BrightnessContrast( device="gpu", contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8) self.resize = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) dtype = types.FLOAT16 if args.fp16 else types.FLOAT self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=0, dtype=dtype, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="cpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(batch_size=args.batch_size, num_threads=args.num_workers, device_id=device_id, prefetch_queue_depth=args.prefetch, seed=args.seed) # Reading COCO dataset self.input = ops.readers.COCO(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.decoders.Image(device="cpu", output_type=types.RGB) self.decode_crop = ops.decoders.ImageSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.decoders.Image(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.decode_gpu_crop = ops.decoders.ImageSlice(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.hsv_cpu = ops.Hsv(device="cpu", dtype=types.FLOAT) self.hsv_gpu = ops.Hsv(device="gpu", dtype=types.FLOAT) self.bc_cpu = ops.BrightnessContrast(device="cpu", dtype=types.UINT8, contrast_center=128) self.bc_gpu = ops.BrightnessContrast(device="gpu", dtype=types.UINT8, contrast_center=128) self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.saturation_rng = ops.random.Uniform(range=[0.8, 1.2]) self.contrast_rng = ops.random.Uniform(range=[0.5, 1.5]) self.brighness_rng = ops.random.Uniform(range=[0.875, 1.125]) self.hue_rng = ops.random.Uniform(range=[-45, 45])
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() else: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, random_shuffle=True, skip_empty=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.rotate = ops.Rotate(device="gpu", angle=30, interp_type=types.INTERP_LINEAR, fill_value=0) self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) # Will flip with probability provided in CoinFlip self.flip = ops.Flip(device='gpu') self.coin_flip_v = ops.CoinFlip(probability=0.1) self.coin_flip_h = ops.CoinFlip(probability=0.1) # bbox flipping self.bbflip = ops.BbFlip(device='gpu', ltrb=True) # paste self.paste = ops.Paste(device='gpu', fill_value=0) self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, 2)) self.bbpaste = ops.BBoxPaste(device='cpu', ltrb=True) # prospective self.prospective_crop = ops.RandomBBoxCrop( device='cpu', aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5], scaling=[0.8, 1.0], ltrb=True ) # slice (after prospective crop) self.slice = ops.Slice(device='gpu') # color self.water = ops.Water(device='gpu') # self.contrast = ops.BrightnessContrast(device="gpu", brightness=0.5, contrast=1.5) # self.hsv = ops.Hsv(device="gpu", hue=45., saturation=0.2) self.sphere = ops.Sphere(device='gpu') self.warpaffine = ops.WarpAffine(device="gpu", matrix=[1.0, 0.8, 0.0, 0.0, 1.2, 0.0], interp_type=types.INTERP_LINEAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, device_id, file_root, meta_files_path, annotations_file, num_gpus, anchors_ltrb_list, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15, dali_cache=-1, dali_async=True, use_nvjpeg=False): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed, exec_pipelined=dali_async, exec_async=dali_async) self.use_nvjpeg = use_nvjpeg try: shard_id = torch.distributed.get_rank() # Note: <= 19.05 was a RuntimeError, 19.06 is now throwing AssertionError except (RuntimeError, AssertionError): shard_id = 0 if meta_files_path == None: self.c_input = ops.COCOReader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, skip_empty=True, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) else: self.c_input = ops.COCOReader( file_root=file_root, meta_files_path=meta_files_path, shard_id=shard_id, num_shards=num_gpus, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) self.c_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) decoder_device = 'mixed' if use_nvjpeg else 'cpu' # fused decode and slice. This is "region-of-interest" (roi) decoding self.m_decode = ops.ImageDecoderSlice(device=decoder_device, output_type=types.RGB) self.g_slice = None # special case for using dali decode caching: the caching decoder can't # be fused with slicing (because we need to slice the decoded image # differently every epoch), so we need to unfuse decode and slice: if dali_cache > 0 and use_nvjpeg: self.m_decode = ops.ImageDecoder(device='mixed', output_type=types.RGB, cache_size=dali_cache * 1024, cache_type="threshold", cache_threshold=10000) self.g_slice = ops.Slice(device="gpu") # Augumentation techniques (in addition to random crop) self.g_twist = ops.ColorTwist(device="gpu") self.g_resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.) std_val = list(np.array([0.229, 0.224, 0.225]) * 255.) self.g_normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=mean_val, std=std_val, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.c_rng1 = ops.Uniform(range=[0.5, 1.5]) self.c_rng2 = ops.Uniform(range=[0.875, 1.125]) self.c_rng3 = ops.Uniform(range=[-0.5, 0.5]) flip_probability = 0.5 self.c_flip_coin = ops.CoinFlip( probability=flip_probability) # coin_rnd self.c_bbflip = ops.BbFlip(device="cpu", ltrb=True) self.g_box_encoder = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=anchors_ltrb_list, offset=True, stds=[0.1, 0.1, 0.2, 0.2], scale=300) self.g_cast = ops.Cast(device="gpu", dtype=types.FLOAT)
def __init__(self, default_boxes, root, annFile, batch_size, mean, std, local_rank, num_workers, seed): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=local_rank, num_threads=num_workers, seed=seed) # try: # shard_id = torch.distributed.get_rank() # num_shards = torch.distributed.get_world_size() # except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=root, annotations_file=annFile, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques # expand 1~2 self.paste_ratio = ops.Uniform(range=[1, 2]) self.paste_pos = ops.Uniform(range=[0, 1]) self.paste = ops.Paste(device="gpu", fill_value=tuple(mean)) self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True) # random crop self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="gpu", resize_x=320, resize_y=320, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(320, 320), mean=mean, std=std, mirror=0, output_dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, train=False, batch_size=16, workers=4, size=512): # TODO: support size as tuple local_rank, world_size = env_rank(), env_world_size() super().__init__(batch_size, workers, local_rank, seed=42) split_str = "train" if train else "val" self.input = ops.COCOReader( file_root=f"{DATA_DIR}/{split_str}2017", annotations_file= f"{DATA_DIR}/annotations/instances_{split_str}2017.json", shard_id=local_rank, num_shards=world_size, ratio=True, # want bbox in [0, 1] ltrb=True, # random_shuffle=train, save_img_ids=True, # Need ids for evaluation skip_empty= True, # skips images without objects. not sure if we want to do so ) self.bbox_crop = ops.RandomBBoxCrop( device="cpu", # gpu is not supported (and not needed actually) bbox_layout="xyXY", # same as 'ltrb' scaling=[0.3, 1.0], # adding 0.0 to thr instead of `allow_no_crop` thresholds=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9], ) if train: self.decode = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) else: self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_CUBIC, resize_longer=size, save_attrs=True) self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_CUBIC, resize_longer=size) self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True) self.img_flip = ops.Flip(device="gpu") # color augmentations self.bc = ops.BrightnessContrast(device="gpu") self.hsv = ops.Hsv(device="gpu") # pad to match output stride self.pad = ops.Pad(device="gpu", fill_value=0, axes=(1, 2), shape=(size, size)) self.normalize = ops.CropMirrorNormalize( device="gpu", # Imagenet mean and std mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], # mean=[0, 0, 0], # std=[1, 1, 1], output_dtype=types.FLOAT, output_layout=types.NCHW, ) # TODO: add Jitter aug # Random number generation for augmentation self.coin_flip = ops.CoinFlip(probability=0.5) self.rng1 = ops.Uniform(range=[0, 1]) self.rng2 = ops.Uniform(range=[0.85, 1.15]) self.rng3 = ops.Uniform(range=[-15, 15]) self.train = train