Exemplo n.º 1
0
def get_pipeline(folder="train", custom_reader=None):
    pipe = Pipeline(batch_size=64, num_threads=1, device_id=1)

    if custom_reader:
        raw_files, labels = custom_reader
    else:
        raw_files, labels = fn.file_reader(file_root="%s" % folder,
                                           random_shuffle=True)

    decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY)
    resize = fn.resize(decode, device="gpu", image_type=types.RGB,
                       interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT)

    hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)),
                 value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8)
    bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1)))

    cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT,
                                   output_layout=types.NHWC,
                                   image_type=types.GRAY,
                                   mean=[255 // 2],
                                   std=[255 // 2])

    rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True)

    tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu")  # Reshaping to a format PyTorch likes

    pipe.set_outputs(tpose, labels)
    pipe.build()

    dali_iter = DALIClassificationIterator([pipe], -1)

    return dali_iter
Exemplo n.º 2
0
def create_coco_pipeline(file_root,
                         annotations_file,
                         batch_size=1,
                         device_id=0,
                         num_threads=4,
                         local_rank=0,
                         world_size=1):
    pipeline = Pipeline(batch_size, num_threads,
                        local_rank, seed=42 + device_id)

    with pipeline:
        images, bboxes, labels = fn.coco_reader(file_root=file_root,
                                                annotations_file=annotations_file,
                                                skip_empty=True,
                                                shard_id=local_rank,
                                                num_shards=world_size,
                                                ratio=True,
                                                ltrb=True,
                                                random_shuffle=False,
                                                shuffle_after_epoch=True,
                                                name="Reader")

        crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels,
                                                                    device="cpu",
                                                                    aspect_ratio=[0.5, 2.0],
                                                                    thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                                                                    scaling=[0.3, 1.0],
                                                                    bbox_layout="xyXY",
                                                                    allow_no_crop=True,
                                                                    num_attempts=50)
        images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB)
        flip_coin = fn.coin_flip(probability=0.5)
        images = fn.resize(images,
                           resize_x=300,
                           resize_y=300,
                           min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        # use float to avoid clipping and quantizing the intermediate result
        images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]),
                        saturation=fn.uniform(range=[0.5, 1.5]))

        images = fn.brightness_contrast(images,
                                        contrast_center = 128,  # input is in float, but in 0..255 range
                                        dtype = types.UINT8,
                                        brightness = fn.uniform(range=[0.875, 1.125]),
                                        contrast = fn.uniform(range=[0.5, 1.5]))

        bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
        images = fn.crop_mirror_normalize(images,
                                          mean=[104., 117., 123.],
                                          std=[1., 1., 1.],
                                          mirror=flip_coin,
                                          dtype=types.FLOAT,
                                          output_layout="CHW",
                                          pad_output=False)

        pipeline.set_outputs(images, bboxes, labels)
    return pipeline
Exemplo n.º 3
0
def create_coco_pipeline(default_boxes, args):
    try:
        shard_id = torch.distributed.get_rank()
        num_shards = torch.distributed.get_world_size()
    except RuntimeError:
        shard_id = 0
        num_shards = 1

    images, bboxes, labels = fn.readers.coco(
        file_root=args.train_coco_root,
        annotations_file=args.train_annotate,
        skip_empty=True,
        shard_id=shard_id,
        num_shards=num_shards,
        ratio=True,
        ltrb=True,
        random_shuffle=False,
        shuffle_after_epoch=True,
        name="Reader")

    crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(
        bboxes,
        labels,
        device="cpu",
        aspect_ratio=[0.5, 2.0],
        thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
        scaling=[0.3, 1.0],
        bbox_layout="xyXY",
        allow_no_crop=True,
        num_attempts=50)
    images = fn.image_decoder_slice(images,
                                    crop_begin,
                                    crop_size,
                                    device="mixed",
                                    output_type=types.RGB)
    flip_coin = fn.random.coin_flip(probability=0.5)
    images = fn.resize(images,
                       resize_x=300,
                       resize_y=300,
                       min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

    saturation = fn.uniform(range=[0.5, 1.5])
    contrast = fn.uniform(range=[0.5, 1.5])
    brightness = fn.uniform(range=[0.875, 1.125])
    hue = fn.uniform(range=[-0.5, 0.5])

    images = fn.hsv(images, dtype=types.FLOAT, hue=hue,
                    saturation=saturation)  # use float to avoid clipping and
    # quantizing the intermediate result
    images = fn.brightness_contrast(
        images,
        contrast_center=128,  # input is in float, but in 0..255 range
        dtype=types.UINT8,
        brightness=brightness,
        contrast=contrast)

    dtype = types.FLOAT16 if args.fp16 else types.FLOAT

    bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
    images = fn.crop_mirror_normalize(
        images,
        crop=(300, 300),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=flip_coin,
        dtype=dtype,
        output_layout="CHW",
        pad_output=False)

    bboxes, labels = fn.box_encoder(bboxes,
                                    labels,
                                    criteria=0.5,
                                    anchors=default_boxes.as_ltrb_list())

    labels = labels.gpu()
    bboxes = bboxes.gpu()

    return images, bboxes, labels
Exemplo n.º 4
0
def train_pipeline(cfg: TrainLoaderConfig):

    jpeg, label = fn.readers.file(
        file_root=ROOT_DATA_DIR + "/train/",
        random_shuffle=True,
        shard_id=env_rank(),
        num_shards=env_world_size(),
        name="Reader",
    )
    image = fn.decoders.image_random_crop(
        jpeg,
        device="mixed",
        random_aspect_ratio=[0.75, 1.25],
        random_area=[cfg.min_area, 1.0],
        num_attempts=100,
        output_type=types.RGB,
    )

    image_tr = fn.resize(image,
                         device="gpu",
                         size=cfg.image_size,
                         interp_type=types.INTERP_TRIANGULAR)
    if cfg.random_interpolation:
        image_cub = fn.resize(image,
                              device="gpu",
                              size=cfg.image_size,
                              interp_type=types.INTERP_CUBIC)
        image = mix(fn.random.coin_flip(probability=0.5), image_cub, image_tr)
    else:
        image = image_tr

    if cfg.blur_prob > 0:
        blur_image = fn.gaussian_blur(
            image,
            device="gpu",
            window_size=11,
            sigma=fn.random.uniform(range=[0.5, 1.1]))
        image = mix(
            fn.random.coin_flip(probability=cfg.blur_prob, dtype=types.BOOL),
            blur_image, image)

    if cfg.color_twist_prob > 0:
        image_ct = fn.color_twist(
            image,
            device="gpu",
            contrast=fn.random.uniform(range=[0.7, 1.3]),
            brightness=fn.random.uniform(range=[0.7, 1.3]),
            hue=fn.random.uniform(range=[-20, 20]),  # in degrees
            saturation=fn.random.uniform(range=[0.7, 1.3]),
        )
        image = mix(
            fn.random.coin_flip(probability=cfg.color_twist_prob,
                                dtype=types.BOOL), image_ct, image)

    if cfg.gray_prob > 0:
        grayscale_coin = fn.cast(
            fn.random.coin_flip(probability=cfg.gray_prob), dtype=types.FLOAT)
        image = fn.hsv(image, device="gpu", saturation=grayscale_coin)

    if cfg.re_prob:  # random erasing
        image_re = fn.erase(
            image,
            device="gpu",
            anchor=fn.random.uniform(range=(0.0, 1), shape=cfg.re_count * 2),
            shape=fn.random.uniform(range=(0.05, 0.25),
                                    shape=cfg.re_count * 2),
            axis_names="HW",
            fill_value=DATA_MEAN,
            normalized_anchor=True,
            normalized_shape=True,
        )
        image = mix(
            fn.random.coin_flip(probability=cfg.re_prob, dtype=types.BOOL),
            image_re, image)

    image = fn.crop_mirror_normalize(
        image,
        device="gpu",
        crop=(cfg.image_size, cfg.image_size),
        mirror=fn.random.coin_flip(probability=0.5),
        mean=DATA_MEAN,
        std=DATA_STD,
        dtype=types.FLOAT,
        output_layout=types.NCHW,
    )
    label = fn.one_hot(label, num_classes=cfg.num_classes).gpu()
    return image, label