Пример #1
0
def get_data(name, batch):
    isTrain = name == 'train'

    if isTrain:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.49),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return get_imagenet_dataflow(args.data, name, batch, augmentors)
Пример #2
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Пример #3
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.Flip(horiz=True),
            imgaug.ToFloat32(),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), rgb=False, clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_LINEAR),
            imgaug.CenterCrop((224, 224)),
            imgaug.ToFloat32(),
        ]
    return augmentors
Пример #4
0
def get_data(name, batch):
    isTrain = name == 'train'
    image_shape = 224

    if isTrain:
        augmentors = [
            # use lighter augs if model is too small
            GoogleNetResize(
                crop_area_fraction=0.49 if args.width_ratio < 1 else 0.08,
                target_shape=image_shape),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(int(image_shape * 256 / 224),
                                      cv2.INTER_CUBIC),
            imgaug.CenterCrop((image_shape, image_shape)),
        ]
    return get_imagenet_dataflow(args.data_dir,
                                 name,
                                 batch,
                                 augmentors,
                                 meta_dir=args.meta_dir)
Пример #5
0
def get_basic_augmentor(isTrain):
    interpolation = cv2.INTER_LINEAR
    if isTrain:
        augmentors = [
            TorchvisionCropAndResize(),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Пример #6
0
def preprocess(img, image_size):
    augmentors = [
        imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
        imgaug.CenterCrop((224, 224)),
    ]

    if image_size != 224:
        augmentors.append(
            imgaug.ResizeShortestEdge(image_size, cv2.INTER_CUBIC))

    for a in augmentors:
        img = a.augment(img)

    return img
Пример #7
0
 def _augment(self, img, _):
     h, w = img.shape[:2]
     area = h * w
     for _ in range(10):
         targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area
         aspectR = self.rng.uniform(self.aspect_ratio_low, self.aspect_ratio_high)
         ww = int(np.sqrt(targetArea * aspectR) + 0.5)
         hh = int(np.sqrt(targetArea / aspectR) + 0.5)
         if self.rng.uniform() < 0.5:
             ww, hh = hh, ww
         if hh <= h and ww <= w:
             x1 = 0 if w == ww else self.rng.randint(0, w - ww)
             y1 = 0 if h == hh else self.rng.randint(0, h - hh)
             out = img[y1:y1 + hh, x1:x1 + ww]
             out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC)
             return out
     out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img)
     out = imgaug.CenterCrop(self.target_shape).augment(out)
     return out
Пример #8
0
def get_data(name, batch):
    isTrain = name == 'train'
    if isTrain:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.RandomCrop(224),
            imgaug.Lighting(0.1,
                            eigval=np.asarray(
                                [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                            eigvec=np.array(
                                [[-0.5675, 0.7192, 0.4009],
                                 [-0.5808, -0.0045, -0.8140],
                                 [-0.5836, -0.6948, 0.4203]],
                                dtype='float32')[::-1, ::-1]),
            imgaug.Flip(horiz=True)]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224))]
    return get_imagenet_dataflow(args.data, name, batch, augmentors)
Пример #9
0
def get_data(is_train,
             batch_size,
             data_dir_path,
             input_image_size=224,
             resize_inv_factor=0.875):
    assert (resize_inv_factor > 0.0)
    resize_value = int(math.ceil(float(input_image_size) / resize_inv_factor))

    if is_train:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.08,
                            target_shape=input_image_size),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True)
        ]
    else:
        augmentors = [
            # imgaug.ResizeShortestEdge(resize_value, cv2.INTER_CUBIC),
            imgaug.ResizeShortestEdge(resize_value, cv2.INTER_LINEAR),
            imgaug.CenterCrop((input_image_size, input_image_size))
        ]

    return get_imagenet_dataflow(datadir=data_dir_path,
                                 is_train=is_train,
                                 batch_size=batch_size,
                                 augmentors=augmentors)
Пример #10
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    interpolation = cv2.INTER_CUBIC
    # linear seems to have more stable performance.
    # but we keep cubic for compatibility with old models
    if isTrain:
        augmentors = [
            imgaug.GoogleNetRandomCropAndResize(interp=interpolation),
            imgaug.ToFloat32(),  # avoid frequent casting in each color augmentation
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4)),
                 imgaug.Contrast((0.6, 1.4), rgb=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.ToUint8(),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Пример #11
0
def get_inat_augmented_data(subset,
                            options,
                            lmdb_dir=None,
                            year='2018',
                            do_multiprocess=True,
                            do_validation=False,
                            is_train=None,
                            shuffle=None,
                            n_allow=None):
    input_size = options.input_size if options.input_size else 224
    isTrain = is_train if is_train is not None else (subset == 'train'
                                                     and do_multiprocess)
    shuffle = shuffle if shuffle is not None else isTrain
    postfix = "" if n_allow is None else "_allow_{}".format(n_allow)

    #TODO: Parameterize the cv split to be consider
    #Currently hardcoding to 1
    cv = 1

    # When do_validation is True it will expect *cv_train and *cv_val lmdbs
    # Currently the cv_train split is always used
    if isTrain:
        postfix += '_cv_train_{}'.format(cv)
    elif do_validation:
        subset = 'train'
        postfix += '_cv_val_{}'.format(cv)

    if lmdb_dir == None:
        lmdb_path = os.path.join(options.data_dir, 'inat_lmdb',
                                 'inat2018_{}{}.lmdb'.format(subset, postfix))
    else:
        lmdb_path = os.path.join(
            options.data_dir, lmdb_dir,
            'inat{}_{}{}.lmdb'.format(year, subset, postfix))

    ds = LMDBData(lmdb_path, shuffle=False)
    if shuffle:
        ds = LocallyShuffleData(ds,
                                1024 * 80)  # This is 64G~80G in memory images
    ds = PrefetchData(ds, 1024 * 8, 1)  # prefetch around 8 G
    ds = LMDBDataPoint(ds)
    ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR),
                          0)  # BGR uint8 data
    if isTrain:

        class Resize(imgaug.ImageAugmentor):
            """
            crop 8%~100% of the original image
            See `Going Deeper with Convolutions` by Google.
            """
            def _augment(self, img, _):
                h, w = img.shape[:2]
                area = h * w
                for _ in range(10):
                    targetArea = self.rng.uniform(0.08, 1.0) * area
                    aspectR = self.rng.uniform(0.75, 1.333)
                    ww = int(np.sqrt(targetArea * aspectR))
                    hh = int(np.sqrt(targetArea / aspectR))
                    if self.rng.uniform() < 0.5:
                        ww, hh = hh, ww
                    if hh <= h and ww <= w:
                        x1 = 0 if w == ww else self.rng.randint(0, w - ww)
                        y1 = 0 if h == hh else self.rng.randint(0, h - hh)
                        out = img[y1:y1 + hh, x1:x1 + ww]
                        out = cv2.resize(out, (input_size, input_size),
                                         interpolation=cv2.INTER_CUBIC)
                        return out
                out = cv2.resize(img, (input_size, input_size),
                                 interpolation=cv2.INTER_CUBIC)
                return out

        augmentors = [
            Resize(),
            imgaug.RandomOrderAug([
                imgaug.Brightness(30, clip=False),
                imgaug.Contrast((0.8, 1.2), clip=False),
                imgaug.Saturation(0.4),
                # rgb-bgr conversion
                imgaug.Lighting(0.1,
                                eigval=[0.2175, 0.0188, 0.0045][::-1],
                                eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                                 [-0.5808, -0.0045, -0.8140],
                                                 [-0.5836, -0.6948, 0.4203]],
                                                dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Clip(),
            imgaug.Flip(horiz=True),
            imgaug.ToUint8()
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256),
            imgaug.CenterCrop((input_size, input_size)),
            imgaug.ToUint8()
        ]
    ds = AugmentImageComponent(ds, augmentors, copy=False)
    if do_multiprocess:
        ds = PrefetchDataZMQ(ds, min(24, multiprocessing.cpu_count()))
    ds = BatchData(ds,
                   options.batch_size // options.nr_gpu,
                   remainder=not isTrain)
    return ds
Пример #12
0
def inference_augmentor():
    return [
        imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
        imgaug.CenterCrop((224, 224))
    ]
Пример #13
0
def get_tiny_imagenet_augmented_data(subset, options,
        do_multiprocess=True, is_train=None, shuffle=None):
    isTrain = is_train if is_train is not None else (subset == 'train' and do_multiprocess)
    shuffle = shuffle if shuffle is not None else isTrain

    lmdb_path = os.path.join(options.data_dir,
        'tiny_imagenet_lmdb', 'tiny_imagenet_{}.lmdb'.format(subset))
    # since tiny imagenet is small (200MB zipped) we can shuffle all directly.
    # we skipped the LocallyShuffleData and PrefetchData routine.
    ds = LMDBData(lmdb_path, shuffle=shuffle)
    ds = LMDBDataPoint(ds)
    ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
    img_size = 64
    if isTrain:
        class Resize(imgaug.ImageAugmentor):
            """
            crop 8%~100% of the original image
            See `Going Deeper with Convolutions` by Google.
            """
            def _augment(self, img, _):
                h, w = img.shape[:2]
                area = h * w
                for _ in range(10):
                    targetArea = self.rng.uniform(0.3, 1.0) * area
                    aspectR = self.rng.uniform(0.75, 1.333)
                    ww = int(np.sqrt(targetArea * aspectR))
                    hh = int(np.sqrt(targetArea / aspectR))
                    if self.rng.uniform() < 0.5:
                        ww, hh = hh, ww
                    if hh <= h and ww <= w:
                        x1 = 0 if w == ww else self.rng.randint(0, w - ww)
                        y1 = 0 if h == hh else self.rng.randint(0, h - hh)
                        out = img[y1:y1 + hh, x1:x1 + ww]
                        out = cv2.resize(out, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
                        return out
                out = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
                return out

        augmentors = [
            Resize(),
            imgaug.RandomOrderAug(
                [imgaug.Brightness(30, clip=False),
                 imgaug.Contrast((0.8, 1.2), clip=False),
                 imgaug.Saturation(0.4),
                 # rgb-bgr conversion
                 imgaug.Lighting(0.1,
                                 eigval=[0.2175, 0.0188, 0.0045][::-1],
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Clip(),
            imgaug.Flip(horiz=True),
            imgaug.ToUint8()
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(72),
            imgaug.CenterCrop((img_size, img_size)),
            imgaug.ToUint8()
        ]
    ds = AugmentImageComponent(ds, augmentors, copy=False)
    ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain)
    if do_multiprocess:
        ds = PrefetchData(ds, nr_prefetch=4, nr_proc=4)
    return ds
Пример #14
0
def get_input_imagenet():
    train = dataset.ILSVRC12("/datasets/ImageNet/ILSVRC/Data/CLS-LOC",
                             "train",
                             dir_structure="train",
                             shuffle=True)
    test = dataset.ILSVRC12("/datasets/ImageNet/ILSVRC/Data/CLS-LOC",
                            "val",
                            dir_structure="train",
                            shuffle=False)

    # Copied from tensorpack examples:
    # https://github.com/tensorpack/tensorpack/blob/master/examples/ImageNetModels/imagenet_utils.py

    train_augmentors = imgaug.AugmentorList([
        imgaug.GoogleNetRandomCropAndResize(interp=cv2.INTER_CUBIC),
        # It's OK to remove the following augs if your CPU is not fast enough.
        # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
        # Removing lighting leads to a tiny drop in accuracy.
        imgaug.RandomOrderAug([
            imgaug.BrightnessScale((0.6, 1.4), clip=False),
            imgaug.Contrast((0.6, 1.4), rgb=False, clip=False),
            imgaug.Saturation(0.4, rgb=False),
            # rgb-bgr conversion for the constants copied from fb.resnet.torch
            imgaug.Lighting(
                0.1,
                eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                eigvec=np.array(
                    [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140],
                     [-0.5836, -0.6948, 0.4203]],
                    dtype='float32')[::-1, ::-1])
        ]),
        imgaug.Flip(horiz=True),
    ])

    test_augmentors = imgaug.AugmentorList([
        imgaug.ResizeShortestEdge(256, interp=cv2.INTER_CUBIC),
        imgaug.CenterCrop((224, 224)),
    ])

    def preprocess(augmentors):
        def apply(x):
            image, label = x
            onehot = np.zeros(1000)
            onehot[label] = 1.0
            image = augmentors.augment(image)
            return image, onehot

        return apply

    parallel = min(40,
                   multiprocessing.cpu_count() // 2)  # assuming hyperthreading
    train = MapData(train, preprocess(train_augmentors))
    train = PrefetchDataZMQ(train, parallel)

    test = MultiThreadMapData(test,
                              parallel,
                              preprocess(test_augmentors),
                              strict=True)
    test = PrefetchDataZMQ(test, 1)

    return train, test, ((224, 224, 3), (1000, ))
Пример #15
0
def get_input_visualwakewords():
    # VISUALWAKEWORDS_CONFIG = {
    #     "train_instances": "/home/el/Datasets/COCO14/visualwakewords/instances_visualwakewords_train2014.json",
    #     "train_images": "/home/el/Datasets/COCO14/train2014",
    #     "val_instances": "/home/el/Datasets/COCO14/visualwakewords/instances_visualwakewords_val2014.json",
    #     "val_images": "/home/el/Datasets/COCO14/val2014",
    #     "filter_list": "/home/el/Datasets/COCO14/visualwakewords/mscoco_minival_ids.txt"
    # }

    target_side_size = 128
    train = VisualWakeWords(VISUALWAKEWORDS_CONFIG["train_instances"],
                            VISUALWAKEWORDS_CONFIG["train_images"],
                            shuffle=True)
    test = VisualWakeWords(VISUALWAKEWORDS_CONFIG["val_instances"],
                           VISUALWAKEWORDS_CONFIG["val_images"],
                           filter_list=VISUALWAKEWORDS_CONFIG["filter_list"],
                           shuffle=False)

    train_augmentors = imgaug.AugmentorList([
        imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.25, 1.),
                                            target_shape=target_side_size,
                                            interp=cv2.INTER_CUBIC),
        imgaug.Flip(horiz=True),
    ])

    autoaugment_policy = ImageNetPolicy()

    test_augmentors = imgaug.AugmentorList([
        imgaug.ResizeShortestEdge(int(target_side_size * 1.2),
                                  interp=cv2.INTER_CUBIC),
        imgaug.CenterCrop((target_side_size, target_side_size)),
    ])

    def preprocess(train):
        def apply(x):
            image, label = x
            onehot = np.zeros(2)
            onehot[label] = 1.0
            augmentors = train_augmentors if train else test_augmentors
            image = augmentors.augment(image)
            if train:
                image = np.array(autoaugment_policy(Image.fromarray(image)))
            return image, onehot
            # mean = [0.4767, 0.4488, 0.4074]
            # std = [0.2363, 0.2313, 0.2330]
            # return (image / 255.0 - mean) / std, onehot

        return apply

    parallel = min(18,
                   multiprocessing.cpu_count() // 2)  # assuming hyperthreading
    train = MapData(train, preprocess(train=True))
    train = PrefetchDataZMQ(train, parallel)

    test = MultiThreadMapData(test,
                              parallel,
                              preprocess(train=False),
                              strict=True)
    test = PrefetchDataZMQ(test, 1)

    return train, test, ((target_side_size, target_side_size, 3), (2, ))