def __init__(self, device, batch_size, pos_size_iter, num_threads=1, device_id=0, is_fused_decoder=False, axes=None, axis_names=None, normalized_anchor=True, normalized_shape=True): super(SlicePipeline, self).__init__(batch_size, num_threads, device_id, seed=1234) self.is_fused_decoder = is_fused_decoder self.pos_size_iter = pos_size_iter self.device = device self.input = ops.CaffeReader(path=caffe_db_folder, random_shuffle=False) self.input_crop_pos = ops.ExternalSource() self.input_crop_size = ops.ExternalSource() if self.is_fused_decoder: if axis_names: self.decode = ops.ImageDecoderSlice( device="cpu", output_type=types.RGB, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axis_names=axis_names) elif axes: self.decode = ops.ImageDecoderSlice( device="cpu", output_type=types.RGB, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axes=axes) else: self.decode = ops.ImageDecoderSlice( device="cpu", output_type=types.RGB, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape) else: self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) if axis_names: self.slice = ops.Slice(device=self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axis_names=axis_names) elif axes: self.slice = ops.Slice(device=self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axes=axes) else: self.slice = ops.Slice(device=self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape)
def __init__(self, device, batch_size, layout, iterator, pos_size_iter, num_threads=1, device_id=0, num_gpus=1, axes=None, axis_names=None, normalized_anchor=True, normalized_shape=True): super(SliceSynthDataPipeline, self).__init__( batch_size, num_threads, device_id, seed=1234) self.device = device self.layout = layout self.iterator = iterator self.pos_size_iter = pos_size_iter self.inputs = ops.ExternalSource() self.input_crop_pos = ops.ExternalSource() self.input_crop_size = ops.ExternalSource() if axis_names: self.slice = ops.Slice(device = self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axis_names = axis_names) elif axes: self.slice = ops.Slice(device = self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axes = axes) else: self.slice = ops.Slice(device = self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, )
def __init__(self, batch_size, num_threads, device_id, prefetch, seed): super(CommonPipeline, self).__init__(batch_size, num_threads, device_id, prefetch_queue_depth=prefetch) self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB) self.decode_crop = ops.HostDecoderSlice(device="cpu", output_type=types.RGB) self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=seed) self.crop2 = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, seed=seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.flip_cpu = ops.Flip(device="cpu") self.bb_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bb_flip_gpu = ops.BbFlip(device="gpu", ltrb=True)
def __init__(self, device, batch_size, layout, iterator, pos_size_iter, num_threads=1, device_id=0, num_gpus=1, axes=None, axis_names=None, normalized_anchor=True, normalized_shape=True, extra_outputs=False, out_of_bounds_policy=None, fill_values=None): super(SliceSynthDataPipeline, self).__init__(batch_size, num_threads, device_id, seed=1234) self.device = device self.layout = layout self.iterator = iterator self.pos_size_iter = pos_size_iter self.inputs = ops.ExternalSource() self.input_crop_pos = ops.ExternalSource() self.input_crop_size = ops.ExternalSource() self.extra_outputs = extra_outputs self.slice = ops.Slice(device=self.device, normalized_anchor=normalized_anchor, normalized_shape=normalized_shape, axes=axes, axis_names=axis_names, out_of_bounds_policy=out_of_bounds_policy, fill_values=fill_values)
def __init__(self, device, batch_size, pos_size_iter, num_threads=1, device_id=0, is_fused_decoder=False): super(SlicePipeline, self).__init__(batch_size, num_threads, device_id, seed=1234) self.is_fused_decoder = is_fused_decoder self.pos_size_iter = pos_size_iter self.device = device self.input = ops.CaffeReader(path=caffe_db_folder, random_shuffle=False) self.input_crop_pos = ops.ExternalSource() self.input_crop_size = ops.ExternalSource() if self.is_fused_decoder: self.decode = ops.ImageDecoderSlice(device='cpu', output_type=types.RGB) else: self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) self.slice = ops.Slice(device=device, image_type=types.RGB)
def __new__(cls, axes=(1, 0), normalized_anchor=True, normalized_shape=True, **kwargs): """Create a ``Slice`` operator. Parameters ---------- axes : Sequence[int], optional The axis to select. normalized_anchor : bool, optional, default=True Whether the begin of interval is normalized. normalized_shape : bool, optional, default=True Whether the size of interval is normalized. Returns ------- nvidia.dali.ops.Slice The operator. """ return ops.Slice(axes=axes, normalized_anchor=normalized_anchor, device=context.get_device_type(), **kwargs)
def __init__(self, device_id, n_devices, file_root, file_list, batch_size, sample_rate=16000, window_size=.02, window_stride=.01, nfeatures=64, nfft=512, frame_splicing_factor=3, silence_threshold=-80, dither=.00001, preemph_coeff=.97, lowfreq=0.0, highfreq=0.0, num_threads=1): super().__init__(batch_size, num_threads, device_id, seed=42) self.dither = dither self.frame_splicing_factor = frame_splicing_factor self.read = ops.readers.File(file_root=file_root, file_list=file_list, device="cpu", shard_id=device_id, num_shards=n_devices) self.decode = ops.AudioDecoder(device="cpu", dtype=types.FLOAT, downmix=True) self.normal_distribution = ops.random.Normal(device="cpu") self.preemph = ops.PreemphasisFilter(preemph_coeff=preemph_coeff) self.spectrogram = ops.Spectrogram(device="cpu", nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate) self.mel_fbank = ops.MelFilterBank(device="cpu", sample_rate=sample_rate, nfilter=nfeatures, normalize=True, freq_low=lowfreq, freq_high=highfreq) self.log_features = ops.ToDecibels(device="cpu", multiplier=np.log(10), reference=1.0, cutoff_db=-80) self.get_shape = ops.Shapes(device="cpu") self.normalize = ops.Normalize(axes=[0], device="cpu") self.splicing_transpose = ops.Transpose(device="cpu", perm=[1, 0]) self.splicing_reshape = ops.Reshape(device="cpu", rel_shape=[-1, frame_splicing_factor]) self.splicing_pad = ops.Pad(axes=[0], fill_value=0, align=frame_splicing_factor, shape=[1], device="cpu") self.get_nonsilent_region = ops.NonsilentRegion(device="cpu", cutoff_db=silence_threshold) self.trim_silence = ops.Slice(device="cpu", axes=[0]) self.to_float = ops.Cast(dtype=types.FLOAT)
def __init__(self, batch_size, num_threads, device_id): super(COCOPipeline, self).__init__(batch_size, num_threads, device_id, exec_async=False, exec_pipelined=False, seed=15) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=num_gpus, ratio=True, ltrb=True) self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, 2)) self.coin = ops.CoinFlip(probability=0.5) self.coin2 = ops.CoinFlip(probability=0.5) self.paste = ops.Paste(device="gpu", fill_value=(32, 64, 128)) self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True) self.prospective_crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5], scaling=[0.8, 1.0], ltrb=True) self.slice = ops.Slice(device="gpu") # resize self.resize = ops.Resize(device="gpu", interp_type=types.INTERP_LINEAR, resize_shorter=800, max_size=1200) self.shape = ops.Shapes(device="gpu") # normalize and convert hwc to chw self.cmnp = ops.CropMirrorNormalize( device="gpu", output_dtype=types.FLOAT, output_layout=types.NCHW, image_type=types.RGB, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) # padding axes=(0,1) -> hwc, axes=(1,2) -> chw self.padding = ops.Pad(device="gpu", fill_value=0, axes=(1, 2), shape=(800, 1200))
def __init__(self, batch_size, file_root, annotations_file, default_boxes, seed, device_id=0, num_threads=4): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, ratio=True, ltrb=True, random_shuffle=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.8, 1.0], ltrb=True) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255., 0.456 * 255., 0.406 * 255.], std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, batch_size, num_threads, device_id, external_data, is_train=True): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.is_train = is_train self.input = ops.ExternalSource() self.angle_rng = ops.Uniform(range=(-10.0, 10.0)) self.rotate = ops.Rotate(device="gpu") self.flip_rng = ops.CoinFlip(probability=0.5) self.flip = ops.Flip(device='gpu') self.slice = ops.Slice() self.external_data = external_data self.iterator = iter(self.external_data)
def __init__(self, default_boxes, args, seed): super(COCOPipeline, self).__init__(batch_size=args.batch_size, device_id=args.local_rank, num_threads=args.num_workers, seed=seed) try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) self.slice = ops.Slice(device="cpu") self.hsv = ops.Hsv( device="gpu", dtype=types.FLOAT) # use float to avoid clipping and # quantizing the intermediate result self.bc = ops.BrightnessContrast( device="gpu", contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8) self.resize = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) dtype = types.FLOAT16 if args.fp16 else types.FLOAT self.normalize = ops.CropMirrorNormalize( device="gpu", crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=0, dtype=dtype, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="cpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(batch_size=args.batch_size, num_threads=args.num_workers, device_id=device_id, prefetch_queue_depth=args.prefetch, seed=args.seed) # Reading COCO dataset self.input = ops.readers.COCO(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.decoders.Image(device="cpu", output_type=types.RGB) self.decode_crop = ops.decoders.ImageSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.decoders.Image(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.decode_gpu_crop = ops.decoders.ImageSlice(device="mixed", output_type=types.RGB, hw_decoder_load=0) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.hsv_cpu = ops.Hsv(device="cpu", dtype=types.FLOAT) self.hsv_gpu = ops.Hsv(device="gpu", dtype=types.FLOAT) self.bc_cpu = ops.BrightnessContrast(device="cpu", dtype=types.UINT8, contrast_center=128) self.bc_gpu = ops.BrightnessContrast(device="gpu", dtype=types.UINT8, contrast_center=128) self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.saturation_rng = ops.random.Uniform(range=[0.8, 1.2]) self.contrast_rng = ops.random.Uniform(range=[0.5, 1.5]) self.brighness_rng = ops.random.Uniform(range=[0.875, 1.125]) self.hue_rng = ops.random.Uniform(range=[-45, 45])
def __init__( self, *, train_pipeline: bool, # True if train pipeline, False if validation pipeline device_id, num_threads, batch_size, file_root: str, file_list: str, sample_rate, discrete_resample_range: bool, resample_range: list, window_size, window_stride, nfeatures, nfft, frame_splicing_factor, dither_coeff, silence_threshold, preemph_coeff, pad_align, max_duration, mask_time_num_regions, mask_time_min, mask_time_max, mask_freq_num_regions, mask_freq_min, mask_freq_max, mask_both_num_regions, mask_both_min_time, mask_both_max_time, mask_both_min_freq, mask_both_max_freq, preprocessing_device="gpu"): super().__init__(batch_size, num_threads, device_id) self._dali_init_log(locals()) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() n_shards = torch.distributed.get_world_size() else: shard_id = 0 n_shards = 1 self.preprocessing_device = preprocessing_device.lower() assert self.preprocessing_device == "cpu" or self.preprocessing_device == "gpu", \ "Incorrect preprocessing device. Please choose either 'cpu' or 'gpu'" self.frame_splicing_factor = frame_splicing_factor assert frame_splicing_factor == 1, "DALI doesn't support frame splicing operation" self.resample_range = resample_range self.discrete_resample_range = discrete_resample_range self.train = train_pipeline self.sample_rate = sample_rate self.dither_coeff = dither_coeff self.nfeatures = nfeatures self.max_duration = max_duration self.mask_params = { 'time_num_regions': mask_time_num_regions, 'time_min': mask_time_min, 'time_max': mask_time_max, 'freq_num_regions': mask_freq_num_regions, 'freq_min': mask_freq_min, 'freq_max': mask_freq_max, 'both_num_regions': mask_both_num_regions, 'both_min_time': mask_both_min_time, 'both_max_time': mask_both_max_time, 'both_min_freq': mask_both_min_freq, 'both_max_freq': mask_both_max_freq, } self.do_remove_silence = True if silence_threshold is not None else False self.read = ops.FileReader(device="cpu", file_root=file_root, file_list=file_list, shard_id=shard_id, num_shards=n_shards, shuffle_after_epoch=train_pipeline) # TODO change ExternalSource to Uniform for new DALI release if discrete_resample_range and resample_range is not None: self.speed_perturbation_coeffs = ops.ExternalSource( device="cpu", cycle=True, source=self._discrete_resample_coeffs_generator) elif resample_range is not None: self.speed_perturbation_coeffs = random.Uniform( device="cpu", range=resample_range) else: self.speed_perturbation_coeffs = None self.decode = ops.AudioDecoder( device="cpu", sample_rate=self.sample_rate if resample_range is None else None, dtype=types.FLOAT, downmix=True) self.normal_distribution = random.Normal(device=preprocessing_device) self.preemph = ops.PreemphasisFilter(device=preprocessing_device, preemph_coeff=preemph_coeff) self.spectrogram = ops.Spectrogram( device=preprocessing_device, nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate) self.mel_fbank = ops.MelFilterBank(device=preprocessing_device, sample_rate=sample_rate, nfilter=self.nfeatures, normalize=True) self.log_features = ops.ToDecibels(device=preprocessing_device, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20)) self.get_shape = ops.Shapes(device=preprocessing_device) self.normalize = ops.Normalize(device=preprocessing_device, axes=[1]) self.pad = ops.Pad(device=preprocessing_device, axes=[1], fill_value=0, align=pad_align) # Silence trimming self.get_nonsilent_region = ops.NonsilentRegion( device="cpu", cutoff_db=silence_threshold) self.trim_silence = ops.Slice(device="cpu", normalized_anchor=False, normalized_shape=False, axes=[0]) self.to_float = ops.Cast(device="cpu", dtype=types.FLOAT) # Spectrogram masking self.spectrogram_cutouts = ops.ExternalSource( source=self._cutouts_generator, num_outputs=2, cycle=True) self.mask_spectrogram = ops.Erase(device=preprocessing_device, axes=[0, 1], fill_value=0, normalized_anchor=True)
def __init__(self, default_boxes, root, annFile, batch_size, mean, std, local_rank, num_workers, seed): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=local_rank, num_threads=num_workers, seed=seed) # try: # shard_id = torch.distributed.get_rank() # num_shards = torch.distributed.get_world_size() # except RuntimeError: shard_id = 0 num_shards = 1 self.input = ops.COCOReader(file_root=root, annotations_file=annFile, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True) self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) # Augumentation techniques # expand 1~2 self.paste_ratio = ops.Uniform(range=[1, 2]) self.paste_pos = ops.Uniform(range=[0, 1]) self.paste = ops.Paste(device="gpu", fill_value=tuple(mean)) self.bbpaste = ops.BBoxPaste(device="cpu", ltrb=True) # random crop self.crop = ops.RandomBBoxCrop(device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=50) self.slice = ops.Slice(device="gpu") self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="gpu", resize_x=320, resize_y=320, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(320, 320), mean=mean, std=std, mirror=0, output_dtype=types.FLOAT, output_layout=types.NCHW, pad_output=False) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5]) self.flip = ops.Flip(device="gpu") self.bbflip = ops.BbFlip(device="cpu", ltrb=True) self.flip_coin = ops.CoinFlip(probability=0.5) self.box_encoder = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes.as_ltrb_list())
def __init__(self, args, device_id, file_root, annotations_file): super(DetectionPipeline, self).__init__(args.batch_size, args.num_workers, device_id, args.prefetch, args.seed) # Reading COCO dataset self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=device_id, num_shards=args.num_gpus, ratio=True, ltrb=True, random_shuffle=True) self.decode_cpu = ops.HostDecoder(device="cpu", output_type=types.RGB) self.decode_crop = ops.HostDecoderSlice(device="cpu", output_type=types.RGB) self.decode_gpu = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.decode_gpu_crop = ops.nvJPEGDecoderSlice(device="mixed", output_type=types.RGB) self.ssd_crop = ops.SSDRandomCrop(device="cpu", num_attempts=1, seed=args.seed) self.random_bbox_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, seed=args.seed) self.slice_cpu = ops.Slice(device="cpu") self.slice_gpu = ops.Slice(device="gpu") self.resize_cpu = ops.Resize( device="cpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) self.resize_gpu = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] crop_size = (300, 300) self.normalize_cpu = ops.CropMirrorNormalize(device="cpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.normalize_gpu = ops.CropMirrorNormalize(device="gpu", crop=crop_size, mean=mean, std=std, mirror=0, output_dtype=types.FLOAT) self.twist_cpu = ops.ColorTwist(device="cpu") self.twist_gpu = ops.ColorTwist(device="gpu") self.flip_cpu = ops.Flip(device="cpu") self.bbox_flip_cpu = ops.BbFlip(device="cpu", ltrb=True) self.flip_gpu = ops.Flip(device="gpu") self.bbox_flip_gpu = ops.BbFlip(device="gpu", ltrb=True) default_boxes = coco_anchors() self.box_encoder_cpu = ops.BoxEncoder(device="cpu", criteria=0.5, anchors=default_boxes) self.box_encoder_gpu = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=default_boxes) self.box_encoder_cpu_offsets = ops.BoxEncoder( device="cpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) self.box_encoder_gpu_offsets = ops.BoxEncoder( device="gpu", criteria=0.5, offset=True, scale=2, stds=[0.1, 0.1, 0.2, 0.2], anchors=default_boxes) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15, dali_cache=-1, dali_async=True, use_nvjpeg=False, use_roi=False): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed, exec_pipelined=dali_async, exec_async=dali_async) self.use_roi = use_roi self.use_nvjpeg = use_nvjpeg try: shard_id = torch.distributed.get_rank() except RuntimeError: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, skip_empty=True, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), shuffle_after_epoch=(dali_cache <= 0)) if use_nvjpeg: if use_roi: self.decode = ops.nvJPEGDecoderSlice(device="mixed", output_type=types.RGB) # handled in ROI decoder self.slice = None else: if dali_cache > 0: self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, cache_size=dali_cache * 1024, cache_type="threshold", cache_threshold=10000) else: self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB) self.slice = ops.Slice(device="gpu") self.crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) else: self.decode = ops.HostDecoder(device="cpu", output_type=types.RGB) # handled in the cropper self.slice = None self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) # Augumentation techniques (in addition to random crop) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.) std_val = list(np.array([0.229, 0.224, 0.225]) * 255.) self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=mean_val, std=std_val, mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])
def __init__(self, batch_size, device_id, file_root, meta_files_path, annotations_file, num_gpus, anchors_ltrb_list, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15, dali_cache=-1, dali_async=True, use_nvjpeg=False): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed, exec_pipelined=dali_async, exec_async=dali_async) self.use_nvjpeg = use_nvjpeg try: shard_id = torch.distributed.get_rank() # Note: <= 19.05 was a RuntimeError, 19.06 is now throwing AssertionError except (RuntimeError, AssertionError): shard_id = 0 if meta_files_path == None: self.c_input = ops.COCOReader( file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, skip_empty=True, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) else: self.c_input = ops.COCOReader( file_root=file_root, meta_files_path=meta_files_path, shard_id=shard_id, num_shards=num_gpus, random_shuffle=(dali_cache > 0), stick_to_shard=(dali_cache > 0), lazy_init=True, shuffle_after_epoch=(dali_cache <= 0)) self.c_crop = ops.RandomBBoxCrop( device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], ltrb=True, allow_no_crop=True, num_attempts=1) decoder_device = 'mixed' if use_nvjpeg else 'cpu' # fused decode and slice. This is "region-of-interest" (roi) decoding self.m_decode = ops.ImageDecoderSlice(device=decoder_device, output_type=types.RGB) self.g_slice = None # special case for using dali decode caching: the caching decoder can't # be fused with slicing (because we need to slice the decoded image # differently every epoch), so we need to unfuse decode and slice: if dali_cache > 0 and use_nvjpeg: self.m_decode = ops.ImageDecoder(device='mixed', output_type=types.RGB, cache_size=dali_cache * 1024, cache_type="threshold", cache_threshold=10000) self.g_slice = ops.Slice(device="gpu") # Augumentation techniques (in addition to random crop) self.g_twist = ops.ColorTwist(device="gpu") self.g_resize = ops.Resize( device="gpu", resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW mean_val = list(np.array([0.485, 0.456, 0.406]) * 255.) std_val = list(np.array([0.229, 0.224, 0.225]) * 255.) self.g_normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=mean_val, std=std_val, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.c_rng1 = ops.Uniform(range=[0.5, 1.5]) self.c_rng2 = ops.Uniform(range=[0.875, 1.125]) self.c_rng3 = ops.Uniform(range=[-0.5, 0.5]) flip_probability = 0.5 self.c_flip_coin = ops.CoinFlip( probability=flip_probability) # coin_rnd self.c_bbflip = ops.BbFlip(device="cpu", ltrb=True) self.g_box_encoder = ops.BoxEncoder(device="gpu", criteria=0.5, anchors=anchors_ltrb_list, offset=True, stds=[0.1, 0.1, 0.2, 0.2], scale=300) self.g_cast = ops.Cast(device="gpu", dtype=types.FLOAT)
def __init__(self, *, pipeline_type, device_id, num_threads, batch_size, file_root: str, sampler, sample_rate, resample_range: list, window_size, window_stride, nfeatures, nfft, dither_coeff, silence_threshold, preemph_coeff, max_duration, preprocessing_device="gpu"): super().__init__(batch_size, num_threads, device_id) self._dali_init_log(locals()) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() n_shards = torch.distributed.get_world_size() else: shard_id = 0 n_shards = 1 self.preprocessing_device = preprocessing_device.lower() assert self.preprocessing_device == "cpu" or self.preprocessing_device == "gpu", \ "Incorrect preprocessing device. Please choose either 'cpu' or 'gpu'" self.resample_range = resample_range train_pipeline = pipeline_type == 'train' self.train = train_pipeline self.sample_rate = sample_rate self.dither_coeff = dither_coeff self.nfeatures = nfeatures self.max_duration = max_duration self.do_remove_silence = True if silence_threshold is not None else False shuffle = train_pipeline and not sampler.is_sampler_random() self.read = ops.FileReader(name="Reader", pad_last_batch=(pipeline_type == 'val'), device="cpu", file_root=file_root, file_list=sampler.get_file_list_path(), shard_id=shard_id, num_shards=n_shards, shuffle_after_epoch=shuffle) # TODO change ExternalSource to Uniform for new DALI release if resample_range is not None: self.speed_perturbation_coeffs = ops.Uniform(device="cpu", range=resample_range) else: self.speed_perturbation_coeffs = None self.decode = ops.AudioDecoder( device="cpu", sample_rate=self.sample_rate if resample_range is None else None, dtype=types.FLOAT, downmix=True) self.normal_distribution = ops.NormalDistribution( device=preprocessing_device) self.preemph = ops.PreemphasisFilter(device=preprocessing_device, preemph_coeff=preemph_coeff) self.spectrogram = ops.Spectrogram( device=preprocessing_device, nfft=nfft, window_length=window_size * sample_rate, window_step=window_stride * sample_rate) self.mel_fbank = ops.MelFilterBank(device=preprocessing_device, sample_rate=sample_rate, nfilter=self.nfeatures, normalize=True) self.log_features = ops.ToDecibels(device=preprocessing_device, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20)) self.get_shape = ops.Shapes(device=preprocessing_device) self.normalize = ops.Normalize(device=preprocessing_device, axes=[1]) self.pad = ops.Pad(device=preprocessing_device, fill_value=0) # Silence trimming self.get_nonsilent_region = ops.NonsilentRegion( device="cpu", cutoff_db=silence_threshold) self.trim_silence = ops.Slice(device="cpu", normalized_anchor=False, normalized_shape=False, axes=[0]) self.to_float = ops.Cast(device="cpu", dtype=types.FLOAT)
def __init__(self, batch_size, device_id, file_root, annotations_file, num_gpus, output_fp16=False, output_nhwc=False, pad_output=False, num_threads=1, seed=15): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, seed=seed) if torch.distributed.is_initialized(): shard_id = torch.distributed.get_rank() else: shard_id = 0 self.input = ops.COCOReader(file_root=file_root, annotations_file=annotations_file, shard_id=shard_id, num_shards=num_gpus, ratio=True, ltrb=True, random_shuffle=True, skip_empty=True) self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB) # Augumentation techniques self.rotate = ops.Rotate(device="gpu", angle=30, interp_type=types.INTERP_LINEAR, fill_value=0) self.crop = ops.SSDRandomCrop(device="cpu", num_attempts=1) self.twist = ops.ColorTwist(device="gpu") self.resize = ops.Resize(device="gpu", resize_x=300, resize_y=300) # Will flip with probability provided in CoinFlip self.flip = ops.Flip(device='gpu') self.coin_flip_v = ops.CoinFlip(probability=0.1) self.coin_flip_h = ops.CoinFlip(probability=0.1) # bbox flipping self.bbflip = ops.BbFlip(device='gpu', ltrb=True) # paste self.paste = ops.Paste(device='gpu', fill_value=0) self.paste_pos = ops.Uniform(range=(0, 1)) self.paste_ratio = ops.Uniform(range=(1, 2)) self.bbpaste = ops.BBoxPaste(device='cpu', ltrb=True) # prospective self.prospective_crop = ops.RandomBBoxCrop( device='cpu', aspect_ratio=[0.5, 2.0], thresholds=[0.1, 0.3, 0.5], scaling=[0.8, 1.0], ltrb=True ) # slice (after prospective crop) self.slice = ops.Slice(device='gpu') # color self.water = ops.Water(device='gpu') # self.contrast = ops.BrightnessContrast(device="gpu", brightness=0.5, contrast=1.5) # self.hsv = ops.Hsv(device="gpu", hue=45., saturation=0.2) self.sphere = ops.Sphere(device='gpu') self.warpaffine = ops.WarpAffine(device="gpu", matrix=[1.0, 0.8, 0.0, 0.0, 1.2, 0.0], interp_type=types.INTERP_LINEAR) output_dtype = types.FLOAT16 if output_fp16 else types.FLOAT output_layout = types.NHWC if output_nhwc else types.NCHW self.normalize = ops.CropMirrorNormalize(device="gpu", crop=(300, 300), mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], mirror=0, output_dtype=output_dtype, output_layout=output_layout, pad_output=pad_output) # Random variables self.rng1 = ops.Uniform(range=[0.5, 1.5]) self.rng2 = ops.Uniform(range=[0.875, 1.125]) self.rng3 = ops.Uniform(range=[-0.5, 0.5])