def duplicate_with_noise(emojis, repeat=1000):
    width, height = emojis["images"][0].shape
    augmenter = ImageAugmenter(
        width,
        height,  # width and height of the image (must be the same for all images in the batch)
        hflip=False,  # flip horizontally with 50% probability
        vflip=False,  # flip vertically with 50% probability
        scale_to_percent=(
            0.9, 1.1),  # scale the image to 70%-130% of its original size
        scale_axis_equally=
        False,  # allow the axis to be scaled unequally (e.g. x more than y)
        rotation_deg=5,  # rotate between -25 and +25 degrees
        shear_deg=5,  # shear between -10 and +10 degrees
        translation_x_px=2,  # translate between -5 and +5 px on the x-axis
        translation_y_px=2  # translate between -5 and +5 px on the y-axis
    )
    ret = copy.deepcopy(emojis)
    for i in range(repeat):
        print("Iter {0}".format(i))
        augmented_emojis = augmenter.augment_batch(
            np.array(emojis["images"], dtype=np.uint8))
        ret["images"] += list(augmented_emojis)
        ret["labels"] += emojis["labels"]
    images, labels = sklearn.utils.shuffle(ret["images"], ret["labels"])
    ret["images"] = images
    ret["labels"] = labels
    images = []
    for image in ret["images"]:
        image = skimage.transform.resize(image, (28, 28), preserve_range=True)
        # image = skimage.util.random_noise(image, mode="gaussian")
        images.append(image)
    ret["images"] = images
    return ret
def augment_image(image):
    image = 255 - image
    width, height = img_size
    image = cv2.resize(image, (width, height))
    augmenter = ImageAugmenter(
        width,
        height,
        # width and height of the image (must be the same for all images in the batch)
        hflip=False,  # flip horizontally with 50% probability
        vflip=False,  # flip vertically with 50% probability
        scale_to_percent=(
            0.9, 1.05),  # 1.1 scale the image to 70%-130% of its original size
        scale_axis_equally=
        False,  # allow the axis to be scaled unequally (e.g. x more than y)
        rotation_deg=2,  # 2 rotate between -25 and +25 degrees
        shear_deg=5,  # 25 shear between -10 and +10 degrees
        translation_x_px=8,  # 1 translate between -5 and +5 px on the x-axis
        translation_y_px=2,  # (-6, 4)
        blur_radius=0,  # blur radius that will be applied between 0..blur_radius
        noise_variance=0,
        motion_blur_radius=0,
        motion_blur_strength=0)
    image = augmenter.augment_batch(np.array([image], dtype=np.uint8))[0]
    image *= 255
    image = 255 - image
    image = image.astype(np.uint8)
    # cv2.imshow("image", image)
    # cv2.waitKey(0)
    return image
示例#3
0
    def __init__(self, logfile, data_3d=True):
        self.train_data, self.train_labels = load_density_data(
            '../birads_dataset/train-sq-512/', need_3d=data_3d)
        print(self.train_labels[0:])
        self.test_data, self.test_labels = load_density_data(
            '../birads_dataset/dev-sq-512/', need_3d=data_3d)

        #self.n_classes = len(set(CLASSES.values()))

        self.h = 512
        self.w = 512
        if data_3d:
            self.c = 1
        else:
            self.c = 1

        self._idx = 1

        self.classes_map = CLASSES
        self.n_classes = len(set(self.classes_map.values()))
        #self.load_small_data_for_debug = FLAGS.load_small_data_for_debug
        self.should_enforce_class_balance = True  #FLAGS.should_enforce_class_balance
        #self.verbose = FLAGS.verbose
        #self.path_to_image_directory = FLAGS.path_to_image_directory
        #self.MODEL_CLASS = MODEL_CLASS
        #self.image_width, self.image_height, self.c = MODEL_CLASS.get_image_dimensions()

        # Load Data
        #self.load_data_from_metadata()

        self.print_all_label_statistics(logfile)
        if self.should_enforce_class_balance:
            print("Enforcing Class balance")
            self.enforce_class_balance()
            self.print_all_label_statistics(logfile)

        self.trainnum = self.train_data.shape[0]
        self.testnum = self.test_data.shape[0]

        #self.training_examples_count = self.train_labels.shape[0]
        #self.dev_examples_count = self.dev_labels.shape[0]
        #self.test_examples_count = self.test_labels.shape[0]

        #self.n_train_examples, self.n_dev_examples, self.n_test_examples = self.train_data[0].shape[0], self.dev_data[0].shape[0], self.test_data[0].shape[0]
        self.augment_training_data = False
        self.augmenter = ImageAugmenter(
            self.w,
            self.
            h,  # width and height of the image (must be the same for all images in the batch)
            hflip=True,  # flip horizontally with 50% probability
            vflip=True,
            scale_to_percent=
            1.2,  # scale the image to 70%-130% of its original size
            scale_axis_equally=
            False,  # allow the axis to be scaled unequally (e.g. x more than y)
            rotation_deg=10,  # rotate between -25 and +25 degrees
            shear_deg=5,  # shear between -10 and +10 degrees
            translation_x_px=20,  # translate between -5 and +5 px on the x-axis
            translation_y_px=20  # translate between -5 and +5 px on the y-axis
        )
def augment_image(image):
    image = 255 - image
    width, height = img_size
    image = cv2.resize(image, (width, height))
    augmenter = ImageAugmenter(width, height,
                               # width and height of the image (must be the same for all images in the batch)
                               hflip=False,  # flip horizontally with 50% probability
                               vflip=False,  # flip vertically with 50% probability
                               scale_to_percent=(0.9, 1.05),  # 1.1 scale the image to 70%-130% of its original size
                               scale_axis_equally=False,  # allow the axis to be scaled unequally (e.g. x more than y)
                               rotation_deg=2,  # 2 rotate between -25 and +25 degrees
                               shear_deg=5,  # 25 shear between -10 and +10 degrees
                               translation_x_px=8,  # 1 translate between -5 and +5 px on the x-axis
                               translation_y_px=2,  # (-6, 4)
                               blur_radius=0,  # blur radius that will be applied between 0..blur_radius
                               noise_variance=0,
                               motion_blur_radius=0,
                               motion_blur_strength=0
                               )
    image = augmenter.augment_batch(np.array([image], dtype=np.uint8))[0]
    image *= 255
    image = 255 - image
    image = image.astype(np.uint8)
    # cv2.imshow("image", image)
    # cv2.waitKey(0)
    return image
示例#5
0
    def test_non_square_images(self):
        """Test whether transformation of images with unequal x and y axis sizes
        works as expected."""

        y_size = 11
        x_size = 4
        image_before = np.zeros((y_size, x_size), dtype=np.uint8)
        image_target = np.zeros((y_size, x_size), dtype=np.float32)

        # place a bright white line in the center (of the y-axis, so left to right)
        # Augmenter will move it up by 2 (translation on y by -2)
        y_line_pos = int(y_size/2) + 1
        for x_pos in range(x_size):
            image_before[y_line_pos][x_pos] = 255
            image_target[y_line_pos - 2][x_pos] = 1.0

        augmenter = ImageAugmenter(x_size, y_size, translation_y_px=(-2,-2))
        nb_augment = 100
        images = np.resize([image_before], (nb_augment, y_size, x_size))
        images_augmented = augmenter.augment_batch(images)
        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_after, image_target):
                nb_similar += 1
        self.assertEqual(nb_augment, nb_similar)
示例#6
0
def main():
    """Plot example augmentations for Lena and an image loaded from a file."""

    # try on a lena image
    image = data.lena()
    augmenter = ImageAugmenter(image.shape[0], image.shape[1],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5)

    augmenter.plot_image(image, 100)

    # check loading of images from file and augmenting them
    image = misc.imread("chameleon.png")
    augmenter = ImageAugmenter(image.shape[1], image.shape[0],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5)

    augmenter.plot_image(image, 50)

    # move the channel from index 2 (3rd position) to index 0 (1st position)
    # so (y, x, rgb) becomes (rgb, y, x)
    # try if it still works
    image = np.rollaxis(image, 2, 0)
    augmenter = ImageAugmenter(image.shape[2], image.shape[1],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5,
                               channel_is_first_axis=True)
    augmenter.plot_image(image, 50)
示例#7
0
 def augmentor(self, img):
     aug = ImageAugmenter(img_height_px=img.shape[0],
                          img_width_px=img.shape[1],
                          scale_to_percent=self.scale_to_percent,
                          rotation_deg=self.rotation_deg,
                          shear_deg=self.shear_deg,
                          translation_x_px=self.translation_x_px,
                          translation_y_px=self.translation_y_px,
                          transform_channels_equally=self.transform_channels_equally)
     # copy_imgs = np.asarray([img for _ in range(0, count)])
     # return agu.augment_batch(copy_imgs)
     return aug.augment_ordered(img)
示例#8
0
 def augmentor(self, img):
     aug = ImageAugmenter(
         img_height_px=img.shape[0],
         img_width_px=img.shape[1],
         scale_to_percent=self.scale_to_percent,
         rotation_deg=self.rotation_deg,
         shear_deg=self.shear_deg,
         translation_x_px=self.translation_x_px,
         translation_y_px=self.translation_y_px,
         transform_channels_equally=self.transform_channels_equally)
     # copy_imgs = np.asarray([img for _ in range(0, count)])
     # return agu.augment_batch(copy_imgs)
     return aug.augment_ordered(img)
示例#9
0
    def test_translation_y(self):
        """Testing translation on the y-axis."""
        image_before = [[  0,   0],
                        [255, 255]]
        image_target = [[1.0, 1.0],
                        [  0,   0]]
        images = np.array([image_before]).astype(np.uint8)
        # translate always by -1px on y-axis
        augmenter = ImageAugmenter(2, 2, translation_y_px=(-1,-1))

        # all must be similar
        for _ in range(100):
            image_after = augmenter.augment_batch(images)[0]
            self.assertTrue(np.allclose(image_target, image_after))
示例#10
0
    def test_rotation(self):
        """Test rotation of 90 degrees on an image that should change
        upon rotation."""
        image_before = [[0, 255, 0],
                        [0, 255, 0],
                        [0, 255, 0]]
        image_target = [[  0,   0,   0],
                        [1.0, 1.0, 1.0],
                        [  0,   0,   0]]
        images = np.array([image_before]).astype(np.uint8)

        augmenter = ImageAugmenter(3, 3, rotation_deg=(90, 90))

        image_after = augmenter.augment_batch(images)[0]
        self.assertTrue(np.allclose(image_target, image_after))
示例#11
0
    def test_translation_x(self):
        """Testing translation on the x-axis."""
        #image_before = np.zeros((2, 2), dtype=np.uint8)
        image_before = [[255,   0],
                        [255,   0]]
        #image_after = np.zeros((2, 2), dtype=np.float32)
        image_target = [[0, 1.0],
                        [0, 1.0]]
        images = np.array([image_before]).astype(np.uint8)
        augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1))

        # all must be similar
        for _ in range(100):
            image_after = augmenter.augment_batch(images)[0]
            self.assertTrue(np.allclose(image_target, image_after))
示例#12
0
 def test_no_information_leaking(self):
     """Tests whether the image provided to augment_batch() is changed
     instead of only simply returned in the changed form (leaking
     information / hidden sideffects)."""
     image_before = [[255,   0, 255,   0, 255],
                     [  0, 255,   0, 255,   0],
                     [255, 255, 255, 255, 255],
                     [  0, 255,   0, 255,   0],
                     [255,   0, 255,   0, 255]]
     image_before = np.array(image_before, dtype=np.uint8)
     image_before_copy = np.copy(image_before)
     nb_augment = 100
     images = np.resize([image_before], (nb_augment, 5, 5))
     augmenter = ImageAugmenter(5, 5,
                                hflip=True, vflip=True,
                                scale_to_percent=1.5,
                                rotation_deg=25, shear_deg=10,
                                translation_x_px=5, translation_y_px=5)
     images_after = augmenter.augment_batch(images)
     self.assertTrue(np.array_equal(image_before, image_before_copy))
示例#13
0
 def test_no_blacks(self):
     """Test whether random augmentations can cause an image to turn
     completely black (cval=0.0), which should never happen."""
     image_before = data.camera()
     y_size, x_size = image_before.shape
     augmenter = ImageAugmenter(x_size, y_size,
                                scale_to_percent=1.5,
                                scale_axis_equally=False,
                                rotation_deg=90,
                                shear_deg=20,
                                translation_x_px=10,
                                translation_y_px=10)
     image_black = np.zeros(image_before.shape, dtype=np.float32)
     nb_augment = 100
     images = np.resize([image_before], (nb_augment, y_size, x_size))
     images_augmented = augmenter.augment_batch(images)
     nb_black = 0
     for image_after in images_augmented:
         if np.allclose(image_after, image_black):
             nb_black += 1
     self.assertEqual(nb_black, 0)
示例#14
0
    def test_shear(self):
        """Very rough test of shear: It simply measures whether image tend
        to be significantly different after shear (any change)."""

        image_before = [[0, 255, 0],
                        [0, 255, 0],
                        [0, 255, 0]]
        image_target = [[0, 1.0, 0],
                        [0, 1.0, 0],
                        [0, 1.0, 0]]
        images = np.array([image_before]).astype(np.uint8)
        augmenter = ImageAugmenter(3, 3, shear_deg=50)

        # the majority should be different from the source image
        nb_different = 0
        nb_augment = 1000
        for _ in range(nb_augment):
            image_after = augmenter.augment_batch(images)[0]
            if not np.allclose(image_target, image_after):
                nb_different += 1
        self.assertTrue(nb_different > nb_augment*0.9)
示例#15
0
    def test_rotation_invariant(self):
        """Test rotation of -90 to 90 degrees on an rotation invariant image."""
        image_before = [[0,   0, 0],
                        [0, 255, 0],
                        [0,   0, 0]]
        image_target = [[0,   0, 0],
                        [0, 1.0, 0],
                        [0,   0, 0]]
        images = np.array([image_before]).astype(np.uint8)

        # random rotation of up to 180 degress
        augmenter = ImageAugmenter(3, 3, rotation_deg=180)

        # all must be similar to target
        nb_similar = 0
        for _ in range(100):
            image_after = augmenter.augment_batch(images)[0]
            # some tolerance here - interpolation problems can let the image
            # change a bit, even though it should be invariant to rotations
            if np.allclose(image_target, image_after, atol=0.1):
                nb_similar += 1
        self.assertEquals(nb_similar, 100)
示例#16
0
    def test_scaling(self):
        """Rough test for zooming/scaling (only zoom in / scaling >1.0).
        The test is rough, because interpolation problems make the result
        of scaling on synthetic images rather hard to predict (and unintuitive).
        """

        size_x = 4
        size_y = 4

        # a 4x4 image of which the center 3x3 pixels are bright white,
        # everything else black
        image_before = np.zeros((size_y, size_x))
        image_before[1:size_y-1, 1:size_x-1] = 255

        images = np.array([image_before]).astype(np.uint8)

        # about 200% zoom in
        augmenter = ImageAugmenter(size_x, size_y, scale_to_percent=(1.99, 1.99),
                                   scale_axis_equally=True)

        image_after = augmenter.augment_batch(images)[0]
        # we scale positively (zoom in), therefor we expect the center bright
        # spot to grow, resulting in a higher total brightness
        self.assertTrue(np.sum(image_after) > np.sum(image_before)/255)
示例#17
0
    def test_single_channel(self):
        """Tests images with channels (e.g. RGB channels)."""
        # One single channel
        # channel is last axis
        # test by translating an image with one channel on the x-axis (1 px)
        image_before = np.zeros((2, 2, 1), dtype=np.uint8)
        image_before[0, 0, 0] = 255
        image_before[1, 0, 0] = 255

        image_target = np.zeros((2, 2, 1), dtype=np.float32)
        image_target[0, 1, 0] = 1.0
        image_target[1, 1, 0] = 1.0

        images = np.array([image_before]).astype(np.uint8)
        augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1))

        # all must be similar
        for _ in range(100):
            image_after = augmenter.augment_batch(images)[0]
            self.assertTrue(np.allclose(image_target, image_after))

        # One single channel
        # channel is first axis
        # test by translating an image with one channel on the x-axis (1 px)
        image_before = np.zeros((1, 2, 2), dtype=np.uint8)
        image_before[0] = [[255, 0],
                           [255, 0]]

        image_target = np.zeros((1, 2, 2), dtype=np.float32)
        image_target[0] = [[0, 1.0],
                           [0, 1.0]]

        images = np.array([image_before]).astype(np.uint8)
        augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1),
                                   channel_is_first_axis=True)

        # all must be similar
        for _ in range(100):
            image_after = augmenter.augment_batch(images)[0]
            self.assertTrue(np.allclose(image_target, image_after))
示例#18
0
def main():
    """Iterates over the images in each directory, shrinks and augments each one."""
    nb_processed = 0
    nb_errors = 0
    nb_total = len(get_all_filepaths([download_dir for download_dir, write_to_dir in DIRS]))

    # iterate over directories (read-directory and save-to-directory)
    for download_dir, write_to_dir in DIRS:
        print("Reading from '%s'" % (download_dir,))
        print("Writing to '%s'" % (write_to_dir,))

        # create directory if it doesnt exist
        if not os.path.exists(write_to_dir):
            os.makedirs(write_to_dir)

        # load filepaths of images in directory
        fps_img = get_all_filepaths([download_dir])

        # iterate over each image
        for fp_img in fps_img:
            print("Image %d of %d (%.2f%%) (%s)" \
                  % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img))
            try:
                filename = fp_img[fp_img.rfind("/")+1:]

                # dont use misc.imload, fails for grayscale images
                image = ndimage.imread(fp_img, mode="RGB")
                image_orig = np.copy(image)
                #misc.imshow(image)
                #print(image)
                #print(image.shape)

                height = image_orig.shape[0]
                width = image_orig.shape[1]
                wh_ratio = width / height

                # add padding at the borders of the image
                # then augment image
                batch = np.zeros((AUGMENTATIONS, height+(2*PADDING), width+(2*PADDING), 3),
                                 dtype=np.uint8)

                img_padded = np.pad(image, ((PADDING, PADDING), (PADDING, PADDING), (0, 0)),
                                    mode="median")
                for i in range(0, AUGMENTATIONS):
                    batch[i] = np.copy(img_padded)

                ia = ImageAugmenter(width+(2*PADDING), height+(2*PADDING),
                                    channel_is_first_axis=False,
                                    hflip=True, vflip=False,
                                    scale_to_percent=(1.05, 1.2), scale_axis_equally=True,
                                    rotation_deg=5, shear_deg=1,
                                    translation_x_px=15, translation_y_px=15)
                batch = ia.augment_batch(batch)

                for i in range(0, AUGMENTATIONS):
                    # remove padding
                    image = batch[i, PADDING:-PADDING, PADDING:-PADDING, ...]

                    # shrink the image to desired height/width sizes
                    # first delete rows/columns until aspect ratio matches desired aspect ratio
                    # then resize
                    # doing this after the augmentation should decrease the likelihood of
                    # ending with badly looking black areas at the borders of the image
                    removed = 0
                    while not (wh_ratio - EPSILON <= RATIO_WIDTH_TO_HEIGHT <= wh_ratio + EPSILON):
                        if wh_ratio < RATIO_WIDTH_TO_HEIGHT:
                            # height value is too high
                            # remove more from top than from bottom, because we have sky images and
                            # hence much similar content at top and only a few rows of pixels with
                            # different content at the bottom
                            if removed % 4 != 0:
                                # remove one row at the top
                                image = image[1:height-0, :, ...]
                            else:
                                # remove one row at the bottom
                                image = image[0:height-1, :, ...]
                        else:
                            # width value is too high
                            if removed % 2 == 0:
                                # remove one column at the left
                                image = image[:, 1:width-0, ...]
                            else:
                                # remove one column at the right
                                image = image[:, 0:width-1, ...]

                        height = image.shape[0]
                        width = image.shape[1]
                        wh_ratio = width / height
                        removed += 1

                    image_resized = misc.imresize(image, (SCALE_HEIGHT, SCALE_WIDTH))

                    # save augmented image
                    filename_aug = filename.replace(".jp", "__%d.jp" % (i))
                    misc.imsave(os.path.join(write_to_dir, filename_aug), image_resized)
            except IOError as exc:
                # sometimes downloaded images cannot be read by imread()
                # this should catch these cases
                print("I/O error({0}): {1}".format(exc.errno, exc.strerror))
                nb_errors += 1

            nb_processed += 1

        print("Processed %d images" % (nb_processed,))
        print("Encountered %d errors" % (nb_errors,))
        print("Finished.")
示例#19
0
def main():
    """Measure time required to generate augmentations matrices and to apply
    them.
    """
    batch_size = 64
    nb_runs = 20

    # Measure time required to generate 100k augmentation matrices
    """
    print("Generating 100 times 1000 augmentation matrices of size 64x64...")
    start = time.time()
    for _ in range(100):
        create_aug_matrices(1000, 64, 64,
                            scale_to_percent=1.5, scale_axis_equally=False,
                            rotation_deg=20, shear_deg=20,
                            translation_x_px=5, translation_y_px=5)
    print("Done in %.8f" % (time.time() - start,))
    """

    # Test Performance on 64 images of size 512x512 pixels
    image = data.lena()
    images = np.resize(
        image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[0],
                               image.shape[1],
                               hflip=True,
                               vflip=True,
                               scale_to_percent=1.3,
                               scale_axis_equally=False,
                               rotation_deg=25,
                               shear_deg=10,
                               translation_x_px=5,
                               translation_y_px=5)
    print("Running tests on %d images of shape %s" %
          (batch_size, str(image.shape)))
    run_tests(augmenter, images, nb_runs)
    print("")

    print("Running tests on %d images of shape %s" %
          (batch_size, str(image.shape)))
    print("(With 1000 pregenerated matrices)")
    augmenter.pregenerate_matrices(1000)
    run_tests(augmenter, images, nb_runs)
    print("")

    # Test Performance on 64 images of size 64x64 pixels
    image = data.lena()
    image = misc.imresize(image, (64, 64))
    images = np.resize(
        image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[0],
                               image.shape[1],
                               hflip=True,
                               vflip=True,
                               scale_to_percent=1.3,
                               scale_axis_equally=False,
                               rotation_deg=25,
                               shear_deg=10,
                               translation_x_px=5,
                               translation_y_px=5)
    print("Running tests on %d images of shape %s" %
          (batch_size, str(image.shape)))
    run_tests(augmenter, images, nb_runs)

    print("Running tests on %d images of shape %s" %
          (batch_size, str(image.shape)))
    print("(With 1000 pregenerated matrices)")
    augmenter.pregenerate_matrices(1000)
    run_tests(augmenter, images, nb_runs)
    print("")

    # Time required to augment 1,000,000 images of size 32x32
    print("Augmenting 1000 batches of 1000 lena images (1 million total)" \
          ", each of size 32x32...")
    image = data.lena()
    image = misc.imresize(image, (32, 32))
    batch_size = 1000
    images = np.resize(
        image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[1],
                               image.shape[0],
                               hflip=True,
                               vflip=True,
                               scale_to_percent=1.3,
                               scale_axis_equally=False,
                               rotation_deg=25,
                               shear_deg=10,
                               translation_x_px=5,
                               translation_y_px=5)
    augmenter.pregenerate_matrices(1000)

    start = time.time()
    for _ in range(1000):
        augmenter.augment_batch(images)
    print("Done in %.8fs" % (time.time() - start, ))
    print("")

    # Time required to augment 1,000,000 images of size 32x32
    # but using only one matrix without the class (no library overhead from
    # ImageAugmenter)
    # Notice that this does not include horizontal and vertical flipping,
    # which is done via numpy in the ImageAugmenter class.
    print("Augmenting 1000 batches of 1000 lena images (1 million total)" \
          ", each of size 32x32, using one matrix directly (no ImageAugmenter " \
          "class)...")
    matrices = create_aug_matrices(1,
                                   image.shape[1],
                                   image.shape[0],
                                   scale_to_percent=1.3,
                                   scale_axis_equally=False,
                                   rotation_deg=25,
                                   shear_deg=10,
                                   translation_x_px=5,
                                   translation_y_px=5)
    matrix = matrices[0]

    start = time.time()
    for _ in range(1000):
        for image in images:
            augmented_image = tf.warp(image, matrix)
    print("Done in %.8fs" % (time.time() - start, ))
示例#20
0
from scipy import misc
import numpy as np

directory = os.getcwd() + "/training/positive_generated/"
files = [f for f in listdir(directory) if isfile(join(directory, f))]

for file in files:
    if "jpg" not in file and "png" not in file:
        continue
    image = misc.imread("./training/positive_generated/" + file)
    # image=Image.open("./training/positive/"+file)
    # for i in range(0,21):
    # 	image.save(os.getcwd()+"/training/positive_expanded/"+file.strip(".jpg").strip(".png")+str(i)+".jpg")
    height = image.shape[0]
    width = image.shape[1]

    augmenter = ImageAugmenter(
        width,
        height,  # width and height of the image (must be the same for all images in the batch)
        hflip=True,  # flip horizontally with 50% probability
        vflip=True,  # flip vertically with 50% probability
        scale_to_percent=1.3,  # scale the image to 70%-130% of its original size
        scale_axis_equally=
        False,  # allow the axis to be scaled unequally (e.g. x more than y)
        rotation_deg=25,  # rotate between -25 and +25 degrees
        shear_deg=10,  # shear between -10 and +10 degrees
        translation_x_px=5,  # translate between -5 and +5 px on the x-axis
        translation_y_px=5  # translate between -5 and +5 px on the y-axis
    )
    fig = augmenter.plot_image(image, name=file, nb_repeat=20)
示例#21
0
def main():
    """Measure time required to generate augmentations matrices and to apply
    them.
    """
    batch_size = 64
    nb_runs = 20

    # Measure time required to generate 100k augmentation matrices
    """
    print("Generating 100 times 1000 augmentation matrices of size 64x64...")
    start = time.time()
    for _ in range(100):
        create_aug_matrices(1000, 64, 64,
                            scale_to_percent=1.5, scale_axis_equally=False,
                            rotation_deg=20, shear_deg=20,
                            translation_x_px=5, translation_y_px=5)
    print("Done in %.8f" % (time.time() - start,))
    """

    # Test Performance on 64 images of size 512x512 pixels
    image = data.lena()
    images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[0], image.shape[1],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5)
    print("Running tests on %d images of shape %s" % (batch_size, str(image.shape)))
    run_tests(augmenter, images, nb_runs)
    print("")

    print("Running tests on %d images of shape %s" % (batch_size, str(image.shape)))
    print("(With 1000 pregenerated matrices)")
    augmenter.pregenerate_matrices(1000)
    run_tests(augmenter, images, nb_runs)
    print("")

    # Test Performance on 64 images of size 64x64 pixels
    image = data.lena()
    image = misc.imresize(image, (64, 64))
    images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[0], image.shape[1],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5)
    print("Running tests on %d images of shape %s" % (batch_size, str(image.shape)))
    run_tests(augmenter, images, nb_runs)

    print("Running tests on %d images of shape %s" % (batch_size, str(image.shape)))
    print("(With 1000 pregenerated matrices)")
    augmenter.pregenerate_matrices(1000)
    run_tests(augmenter, images, nb_runs)
    print("")

    # Time required to augment 1,000,000 images of size 32x32
    print("Augmenting 1000 batches of 1000 lena images (1 million total)" \
          ", each of size 32x32...")
    image = data.lena()
    image = misc.imresize(image, (32, 32))
    batch_size = 1000
    images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2]))
    augmenter = ImageAugmenter(image.shape[1], image.shape[0],
                               hflip=True, vflip=True,
                               scale_to_percent=1.3, scale_axis_equally=False,
                               rotation_deg=25, shear_deg=10,
                               translation_x_px=5, translation_y_px=5)
    augmenter.pregenerate_matrices(1000)

    start = time.time()
    for _ in range(1000):
        augmenter.augment_batch(images)
    print("Done in %.8fs" % (time.time() - start,))
    print("")

    # Time required to augment 1,000,000 images of size 32x32
    # but using only one matrix without the class (no library overhead from
    # ImageAugmenter)
    # Notice that this does not include horizontal and vertical flipping,
    # which is done via numpy in the ImageAugmenter class.
    print("Augmenting 1000 batches of 1000 lena images (1 million total)" \
          ", each of size 32x32, using one matrix directly (no ImageAugmenter " \
          "class)...")
    matrices = create_aug_matrices(1, image.shape[1], image.shape[0],
                                   scale_to_percent=1.3, scale_axis_equally=False,
                                   rotation_deg=25, shear_deg=10,
                                   translation_x_px=5, translation_y_px=5)
    matrix = matrices[0]

    start = time.time()
    for _ in range(1000):
        for image in images:
            augmented_image = tf.warp(image, matrix)
    print("Done in %.8fs" % (time.time() - start,))
示例#22
0
class DensityLoader(object):
    def __init__(self, logfile, data_3d=True):
        self.train_data, self.train_labels = load_density_data(
            '../birads_dataset/train-sq-512/', need_3d=data_3d)
        print(self.train_labels[0:])
        self.test_data, self.test_labels = load_density_data(
            '../birads_dataset/dev-sq-512/', need_3d=data_3d)

        #self.n_classes = len(set(CLASSES.values()))

        self.h = 512
        self.w = 512
        if data_3d:
            self.c = 1
        else:
            self.c = 1

        self._idx = 1

        self.classes_map = CLASSES
        self.n_classes = len(set(self.classes_map.values()))
        #self.load_small_data_for_debug = FLAGS.load_small_data_for_debug
        self.should_enforce_class_balance = True  #FLAGS.should_enforce_class_balance
        #self.verbose = FLAGS.verbose
        #self.path_to_image_directory = FLAGS.path_to_image_directory
        #self.MODEL_CLASS = MODEL_CLASS
        #self.image_width, self.image_height, self.c = MODEL_CLASS.get_image_dimensions()

        # Load Data
        #self.load_data_from_metadata()

        self.print_all_label_statistics(logfile)
        if self.should_enforce_class_balance:
            print("Enforcing Class balance")
            self.enforce_class_balance()
            self.print_all_label_statistics(logfile)

        self.trainnum = self.train_data.shape[0]
        self.testnum = self.test_data.shape[0]

        #self.training_examples_count = self.train_labels.shape[0]
        #self.dev_examples_count = self.dev_labels.shape[0]
        #self.test_examples_count = self.test_labels.shape[0]

        #self.n_train_examples, self.n_dev_examples, self.n_test_examples = self.train_data[0].shape[0], self.dev_data[0].shape[0], self.test_data[0].shape[0]
        self.augment_training_data = False
        self.augmenter = ImageAugmenter(
            self.w,
            self.
            h,  # width and height of the image (must be the same for all images in the batch)
            hflip=True,  # flip horizontally with 50% probability
            vflip=True,
            scale_to_percent=
            1.2,  # scale the image to 70%-130% of its original size
            scale_axis_equally=
            False,  # allow the axis to be scaled unequally (e.g. x more than y)
            rotation_deg=10,  # rotate between -25 and +25 degrees
            shear_deg=5,  # shear between -10 and +10 degrees
            translation_x_px=20,  # translate between -5 and +5 px on the x-axis
            translation_y_px=20  # translate between -5 and +5 px on the y-axis
        )

    def print_label_statistics(self, labels, logfile, labels_label):
        f = open(logfile, 'a+')

        class_count = {key: 0 for key in set(self.classes_map.values())}
        for label in labels:
            class_count[np.argmax(label)] += 1
        print("Class Balance for {}: {}. Total #: {}".format(
            labels_label, class_count, len(labels)))
        f.write("Class Balance for {}: {}. Total #: {}\n ".format(
            labels_label, class_count, len(labels)))
        f.close()
        return class_count

    def print_all_label_statistics(self, logfile):
        self.print_label_statistics(self.train_labels, logfile, "Train")
        #self.print_label_statistics(self.dev_labels, "Dev")
        self.print_label_statistics(self.test_labels, logfile, "Test")

    def enforce_class_balance(self):
        #self.train_data, self.train_labels = self.enforce_class_balance_helper(self.train_data, self.train_labels)
        #self.dev_data, self.dev_labels = self.enforce_class_balance_helper(self.dev_data, self.dev_labels)
        self.test_data, self.test_labels = self.enforce_class_balance_helper(
            self.test_data, self.test_labels)

    def enforce_class_balance_helper(self, data, labels):
        class_count = {key: 0 for key in set(self.classes_map.values())}
        for i in range(labels.shape[0]):
            label = labels[i][...]
            class_count[np.argmax(label)] += 1
        min_class_count = min(class_count.values())

        image_data = data
        #image_data, additional_data = data

        image_data_new = []
        #additional_data_new = []
        labels_new = []
        for cl, count in class_count.iteritems():
            label_target = [
                1 if i == cl else 0
                for i in range(len(set(class_count.values())))
            ]
            indicies = np.where(labels == label_target)[0]
            indicies = list(set(indicies))
            cur_count = 0
            for index in indicies:
                if cur_count < min_class_count:
                    image_data_new.append(image_data[index][...])
                    #additional_data_new.append(additional_data[index][...])
                    labels_new.append(labels[index][...])
                    cur_count += 1

        image_data_new = np.array(image_data_new)
        #additional_data_new = np.array(additional_data_new)
        #data_new = (image_data_new, additional_data_new)
        labels_new = np.array(labels_new)
        return image_data_new, labels_new

    def augment_images(self, images):
        augmented_images = ((images + 0.5) * 255.0).astype('uint8')
        augmented_images = self.augmenter.augment_batch(augmented_images) - 0.5
        return augmented_images

    def next_batch(self, batch_size, data_group='train'):
        images_batch = np.zeros((batch_size, self.h, self.w, self.c))
        labels_batch = np.zeros((batch_size, self.n_classes))
        for i in range(batch_size):
            images_batch[i, ...] = self.train_data[self._idx].reshape(
                (self.h, self.w, self.c))
            labels_batch[i, ...] = self.train_labels[self._idx]

            self._idx += 1
            if self._idx == self.trainnum:
                self._idx = 0
                #self.train_data = shuffle(self.train_data, random_state=20)
                #self.train_labels = shuffle(self.train_labels, random_state=20)
                self.train_data, self.train_labels = shuffle(self.train_data,
                                                             self.train_labels,
                                                             random_state=20)

        if data_group == 'train' and self.augment_training_data:
            images_batch = self.augment_images(images_batch)

        return images_batch, labels_batch

    def load_test(self):
        #print('test image size of {} :'.format(str(test_data.shape)))
        return self.test_data.reshape(
            (-1, self.h, self.w, self.c)), self.test_labels
     continue
 bbox = [x1, y1, x2, y2]
 # resize marker
 bbox_shape = [x2 - x1, y2 - y1]
 tmp_marker = cv2.resize(marker, (bbox_shape[0], bbox_shape[1]))
 # Image augmentation
 height = tmp_marker.shape[0]
 width = tmp_marker.shape[1]
 augmenter = ImageAugmenter(
     width,
     height,  # width and height of the image (must be the same for all images in the batch)
     hflip=True,  # flip horizontally with 50% probability
     vflip=True,  # flip vertically with 50% probability
     scale_to_percent=
     1.5,  # scale the image to 70%-130% of its original size
     scale_axis_equally=
     False,  # allow the axis to be scaled unequally (e.g. x more than y)
     rotation_deg=45,  # rotate between -25 and +25 degrees
     shear_deg=20,  # shear between -10 and +10 degrees
     translation_x_px=
     0,  # translate between -5 and +5 px on the x-axis
     translation_y_px=
     0  # translate between -5 and +5 px on the y-axis
 )
 # augment a batch containing only this image
 # the input array must have dtype uint8 (ie. values 0-255), as is the case for scipy's imread()
 # the output array will have dtype float32 (0.0-1.0) and can be fed directly into a neural network
 tmp_marker = augmenter.augment_batch(
     np.array([tmp_marker], dtype=np.uint8))
 # Convert tmp_marker back to uint8 format
 tmp_marker = tmp_marker[0] * 255
 tmp_marker = tmp_marker.astype(np.uint8)
 """Returns X and Y examples to train/test on.
 Args:
     count: Maximum number of different images to return (this will be increased by the
            augmentation number, i.e. count=1 with augmentations=10 will return 10+1 examples).
     start_at: Start index of the first example to return.
     augmentations: How often each image will be augmented.
 Returns:
     (X, Y)
     with X being a tensor of images
     and Y being in array of rows [center x, center y, height/2, width/2] of each face rectangle.
 """
 # low strength augmentation because we will not change the coordinates, so the image
 # should be kept mostly the same
 ia = ImageAugmenter(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH,
                     channel_is_first_axis=False,
                     hflip=False, vflip=False,
                     scale_to_percent=(0.95, 1.05), scale_axis_equally=True,
                     rotation_deg=5, shear_deg=2,
                     translation_x_px=1, translation_y_px=1)
 
 images_filepaths = get_all_filepaths(DIRS)
 images = []
 labels = []
 for image_filepath in images_filepaths[start_at:start_at+count]:
     coords_filepath = "%s.cat" % (image_filepath,)
     image, (center_y, center_x), (scale_y, scale_x) = get_image_with_rectangle(image_filepath,
                                                                                coords_filepath)
     # get_image_with_rectangle returns None if the coordinates file was not found,
     # which is the case for one image in 10k cats dataset
     if image is not None:
         images.append(image / 255) # project pixel values to 0-1
         y = [center_y, center_x, scale_y, scale_x]
示例#25
0
    def test_two_channels(self):
        """Tests augmentation of images with two channels (either first or last
        axis of each image). Tested using x-translation."""

        # -----------------------------------------------
        # two channels,
        # channel is the FIRST axis of each image
        # -----------------------------------------------
        augmenter = ImageAugmenter(2, 2, translation_y_px=(0,1),
                                   channel_is_first_axis=True)

        image_before = np.zeros((2, 2, 2)).astype(np.uint8)
        # 1st channel: top row white, bottom row black
        image_before[0][0][0] = 255
        image_before[0][0][1] = 255
        image_before[0][1][0] = 0
        image_before[0][1][1] = 0

        # 2nd channel: top right corner white, everything else black
        image_before[1][0][0] = 0
        image_before[1][0][1] = 255
        image_before[1][1][0] = 0
        image_before[1][1][1] = 0
        #            ^        channel
        #               ^     y (row)
        #                  ^  x (column)

        image_target = np.zeros((2, 2, 2)).astype(np.float32)
        # 1st channel: bottom row white, bottom row black
        image_target[0][0][0] = 0
        image_target[0][0][1] = 0
        image_target[0][1][0] = 1.0
        image_target[0][1][1] = 1.0

        # 2nd channel: bottom right corner white, everything else black
        image_target[1][0][0] = 0
        image_target[1][0][1] = 0
        image_target[1][1][0] = 0
        image_target[1][1][1] = 1.0

        nb_augment = 1000
        image = np.array([image_before]).astype(np.uint8)
        images = np.resize(image, (nb_augment, 2, 2, 2))
        images_augmented = augmenter.augment_batch(images)

        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_target, image_after):
                nb_similar += 1
        self.assertTrue(nb_similar > (nb_augment*0.4) and nb_similar < (nb_augment*0.6))

        # -----------------------------------------------
        # two channels,
        # channel is the LAST axis of each image
        # -----------------------------------------------
        augmenter = ImageAugmenter(2, 2, translation_y_px=(0,1),
                                   channel_is_first_axis=False)

        image_before = np.zeros((2, 2, 2)).astype(np.uint8)
        # 1st channel: top row white, bottom row black
        image_before[0][0][0] = 255
        image_before[0][1][0] = 255
        image_before[1][0][0] = 0
        image_before[1][1][0] = 0

        # 2nd channel: top right corner white, everything else black
        image_before[0][0][1] = 0
        image_before[0][1][1] = 255
        image_before[1][0][1] = 0
        image_before[1][1][1] = 0
        #            ^        y
        #               ^     x
        #                  ^  channel

        image_target = np.zeros((2, 2, 2)).astype(np.float32)
        # 1st channel: bottom row white, bottom row black
        image_target[0][0][0] = 0
        image_target[0][1][0] = 0
        image_target[1][0][0] = 1.0
        image_target[1][1][0] = 1.0

        # 2nd channel: bottom right corner white, everything else black
        image_target[0][0][1] = 0
        image_target[0][1][1] = 0
        image_target[1][0][1] = 0
        image_target[1][1][1] = 1.0

        nb_augment = 1000
        image = np.array([image_before]).astype(np.uint8)
        images = np.resize(image, (nb_augment, 2, 2, 2))
        images_augmented = augmenter.augment_batch(images)

        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_target, image_after):
                nb_similar += 1
        self.assertTrue(nb_similar > (nb_augment*0.4) and nb_similar < (nb_augment*0.6))
示例#26
0
    def test_vertical_flipping(self):
        """Tests vertical flipping of images (mirror on x-axis)."""

        image_before = [[255,   0,   0],
                        [  0, 255, 255],
                        [  0,   0, 255]]
        image_before = np.array(image_before, dtype=np.uint8)
        image_target = [[  0,   0,  1.0],
                        [  0, 1.0,  1.0],
                        [1.0,   0,    0]]
        image_target = np.array(image_target, dtype=np.float32)
        nb_augment = 1000
        images = np.resize([image_before], (nb_augment, 3, 3))

        # Test using just "False" for vflip (should be exactly 0%)
        augmenter = ImageAugmenter(3, 3, vflip=False)
        images_augmented = augmenter.augment_batch(images)
        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_after, image_target):
                nb_similar += 1
        self.assertEqual(nb_similar, 0)

        # Test using just "True" for vflip (should be ~50%)
        augmenter = ImageAugmenter(3, 3, vflip=True)
        images_augmented = augmenter.augment_batch(images)
        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_after, image_target):
                nb_similar += 1
        self.assertTrue(nb_similar > nb_augment*0.4 and nb_similar < nb_augment*0.6)

        # Test using a probability (float value) for vflip (vflip=0.9,
        # should be ~90%)
        augmenter = ImageAugmenter(3, 3, vflip=0.9)
        images_augmented = augmenter.augment_batch(images)
        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_after, image_target):
                nb_similar += 1
        self.assertTrue(nb_similar > nb_augment*0.8 and nb_similar <= nb_augment*1.0)

        # Test with multiple channels
        image_before = np.zeros((2, 3, 3), dtype=np.uint8)
        image_before[0] = [[255, 255,   0],
                           [255,   0,   0],
                           [  0,   0,   0]]
        image_before[1] = [[  0, 255,   0],
                           [  0, 255,   0],
                           [  0,   0, 255]]
        image_target = np.zeros((2, 3, 3), dtype=np.float32)
        image_target[0] = [[  0,   0,   0],
                           [1.0,   0,   0],
                           [1.0, 1.0,   0]]
        image_target[1] = [[  0,   0, 1.0],
                           [  0, 1.0,   0],
                           [  0, 1.0,   0]]
        images = np.resize([image_before], (nb_augment, 2, 3, 3))
        augmenter = ImageAugmenter(3, 3, vflip=1.0, channel_is_first_axis=True)
        images_augmented = augmenter.augment_batch(images)
        nb_similar = 0
        for image_after in images_augmented:
            if np.allclose(image_after, image_target):
                nb_similar += 1
        self.assertTrue(nb_similar > nb_augment*0.9 and nb_similar <= nb_augment*1.0)
示例#27
0
    def test_transform_channels_unequally(self):
        """Tests whether 2 or more channels can be augmented non-identically
        at the same time.

        E.g. channel 0 is rotated by 20 degress, channel 1 (of the same image)
        is rotated by 5 degrees.
        """
        # two channels, channel is first axis of each image
        augmenter = ImageAugmenter(3, 3, translation_x_px=(0,1),
                                   transform_channels_equally=False,
                                   channel_is_first_axis=True)

        image_before = np.zeros((2, 3, 3)).astype(np.uint8)
        image_before[0] = [[255,   0,   0],
                           [  0,   0,   0],
                           [  0,   0,   0]]

        image_before[1] = [[  0,   0,   0],
                           [  0,   0,   0],
                           [  0, 255,   0]]
        #            ^ channel

        image_target = np.zeros((2, 3, 3)).astype(np.float32)
        image_target[0] = [[  0, 1.0,   0],
                           [  0,   0,   0],
                           [  0,   0,   0]]

        image_target[1] = [[  0,   0,   0],
                           [  0,   0,   0],
                           [  0,   0, 1.0]]

        nb_similar_channel_0 = 0
        nb_similar_channel_1 = 0
        nb_equally_transformed = 0
        #nb_unequally_transformed = 0

        nb_augment = 1000
        image = np.array([image_before]).astype(np.uint8)
        images = np.resize(image, (nb_augment, 2, 3, 3))
        images_augmented = augmenter.augment_batch(images)

        # augment 1000 times and count how often the channels were transformed
        # in equal or unequal ways.
        for image_after in images_augmented:
            similar_channel_0 = np.allclose(image_target[0], image_after[0])
            similar_channel_1 = np.allclose(image_target[1], image_after[1])
            if similar_channel_0:
                nb_similar_channel_0 += 1
            if similar_channel_1:
                nb_similar_channel_1 += 1
            if similar_channel_0 == similar_channel_1:
                nb_equally_transformed += 1
            #else:
            #    nb_unequally_transformed += 1
        # each one should be around 50%
        self.assertTrue(nb_similar_channel_0 > 0.40*nb_augment
                        and nb_similar_channel_0 < 0.60*nb_augment)
        self.assertTrue(nb_similar_channel_1 > 0.40*nb_augment
                        and nb_similar_channel_1 < 0.60*nb_augment)
        self.assertTrue(nb_equally_transformed > 0.40*nb_augment
                        and nb_equally_transformed < 0.60*nb_augment)
示例#28
0
def main():
    """Iterates over the images in each directory, shrinks and augments each one."""
    nb_processed = 0
    nb_errors = 0
    nb_total = len(
        get_all_filepaths(
            [download_dir for download_dir, write_to_dir in DIRS]))

    # iterate over directories (read-directory and save-to-directory)
    for download_dir, write_to_dir in DIRS:
        print("Reading from '%s'" % (download_dir, ))
        print("Writing to '%s'" % (write_to_dir, ))

        # create directory if it doesnt exist
        if not os.path.exists(write_to_dir):
            os.makedirs(write_to_dir)

        # load filepaths of images in directory
        fps_img = get_all_filepaths([download_dir])

        # iterate over each image
        for fp_img in fps_img:
            print("Image %d of %d (%.2f%%) (%s)" \
                  % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img))
            try:
                filename = fp_img[fp_img.rfind("/") + 1:]

                # dont use misc.imload, fails for grayscale images
                image = ndimage.imread(fp_img, mode="RGB")
                image_orig = np.copy(image)
                #misc.imshow(image)
                #print(image)
                #print(image.shape)

                height = image_orig.shape[0]
                width = image_orig.shape[1]
                wh_ratio = width / height

                # add padding at the borders of the image
                # then augment image
                batch = np.zeros((AUGMENTATIONS, height +
                                  (2 * PADDING), width + (2 * PADDING), 3),
                                 dtype=np.uint8)

                img_padded = np.pad(image, ((PADDING, PADDING),
                                            (PADDING, PADDING), (0, 0)),
                                    mode="median")
                for i in range(0, AUGMENTATIONS):
                    batch[i] = np.copy(img_padded)

                ia = ImageAugmenter(width + (2 * PADDING),
                                    height + (2 * PADDING),
                                    channel_is_first_axis=False,
                                    hflip=True,
                                    vflip=False,
                                    scale_to_percent=(1.05, 1.2),
                                    scale_axis_equally=True,
                                    rotation_deg=5,
                                    shear_deg=1,
                                    translation_x_px=15,
                                    translation_y_px=15)
                batch = ia.augment_batch(batch)

                for i in range(0, AUGMENTATIONS):
                    # remove padding
                    image = batch[i, PADDING:-PADDING, PADDING:-PADDING, ...]

                    # shrink the image to desired height/width sizes
                    # first delete rows/columns until aspect ratio matches desired aspect ratio
                    # then resize
                    # doing this after the augmentation should decrease the likelihood of
                    # ending with badly looking black areas at the borders of the image
                    removed = 0
                    while not (wh_ratio - EPSILON <= RATIO_WIDTH_TO_HEIGHT <=
                               wh_ratio + EPSILON):
                        if wh_ratio < RATIO_WIDTH_TO_HEIGHT:
                            # height value is too high
                            # remove more from top than from bottom, because we have sky images and
                            # hence much similar content at top and only a few rows of pixels with
                            # different content at the bottom
                            if removed % 4 != 0:
                                # remove one row at the top
                                image = image[1:height - 0, :, ...]
                            else:
                                # remove one row at the bottom
                                image = image[0:height - 1, :, ...]
                        else:
                            # width value is too high
                            if removed % 2 == 0:
                                # remove one column at the left
                                image = image[:, 1:width - 0, ...]
                            else:
                                # remove one column at the right
                                image = image[:, 0:width - 1, ...]

                        height = image.shape[0]
                        width = image.shape[1]
                        wh_ratio = width / height
                        removed += 1

                    image_resized = misc.imresize(image,
                                                  (SCALE_HEIGHT, SCALE_WIDTH))

                    # save augmented image
                    filename_aug = filename.replace(".jp", "__%d.jp" % (i))
                    misc.imsave(os.path.join(write_to_dir, filename_aug),
                                image_resized)
            except IOError as exc:
                # sometimes downloaded images cannot be read by imread()
                # this should catch these cases
                print("I/O error({0}): {1}".format(exc.errno, exc.strerror))
                nb_errors += 1

            nb_processed += 1

        print("Processed %d images" % (nb_processed, ))
        print("Encountered %d errors" % (nb_errors, ))
        print("Finished.")
def main():
    """
    Main function.
    Does the following step by step:
    * Load images (from which to extract cat faces) from SOURCE_DIR
    * Initialize model (as trained via train_cat_face_locator.py)
    * Prepares images for the model (i.e. shrinks them, squares them)
    * Lets model locate cat faces in the images
    * Projects face coordinates onto original images
    * Squares the face rectangles (as we want to get square images at the end)
    * Extracts faces from images with some pixels of padding around theM
    * Augments each face image several times
    * Removes the padding from each face image
    * Resizes each face image to OUT_SCALE (height, width)
    * Saves each face image (unaugmented + augmented images)
    """
    
    # --------------
    # load images
    # --------------
    images, paths = get_images([SOURCE_DIR])
    images = images
    paths = paths
    # we will use the image filenames when saving the images at the end
    images_filenames = [path[path.rfind("/")+1:] for path in paths]
    
    # --------------
    # create model
    # --------------
    #model = create_model_tiny(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, Adam())
    model = create_model(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, Adam())
    load_weights_seq(model, WEIGHTS_FILEPATH)

    # --------------
    # make all images square with required sizes
    # and roll color channel to dimension index 1 (required by theano)
    # --------------
    paddings = []
    images_padded = np.zeros((len(images), MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, 3))
    for idx, image in enumerate(images):
        if idx == 0:
            print(idx, image.shape, paths[idx])
        image_padded, (pad_top, pad_right, pad_bottom, pad_left) = square_image(image)
        images_padded[idx] = misc.imresize(image_padded, (MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH))
        paddings.append((pad_top, pad_right, pad_bottom, pad_left))
    
    #misc.imshow(images_padded[0])
    
    # roll color channel
    images_padded = np.rollaxis(images_padded, 3, 1)

    # project to 0-1
    images_padded /= 255
    #print(images_padded[0])

    # --------------
    # predict positions of faces
    # --------------
    coordinates_predictions = predict_on_images(model, images_padded)
    
    print("[Predicted positions]", coordinates_predictions[0])
    """
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions):
        marked_image = visualize_rectangle(images_padded[idx]*255, tl_x, br_x, tl_y, br_y, \
                                           (255,), channel_is_first_axis=True)
        misc.imshow(marked_image)
    """
    
    # --------------
    # project coordinates from small padded images to full-sized original images (without padding)
    # --------------
    coordinates_orig = []
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions):
        pad_top, pad_right, pad_bottom, pad_left = paddings[idx]
        height_full = images[idx].shape[0] + pad_top + pad_bottom
        width_full = images[idx].shape[1] + pad_right + pad_left
        height_orig = images[idx].shape[0]
        width_orig = images[idx].shape[1]
        
        tl_y_perc = tl_y / MODEL_IMAGE_HEIGHT
        tl_x_perc = tl_x / MODEL_IMAGE_WIDTH
        br_y_perc = br_y / MODEL_IMAGE_HEIGHT
        br_x_perc = br_x / MODEL_IMAGE_WIDTH
        
        # coordinates on full sized squared image version
        tl_y_full = int(tl_y_perc * height_full)
        tl_x_full = int(tl_x_perc * width_full)
        br_y_full = int(br_y_perc * height_full)
        br_x_full = int(br_x_perc * width_full)
        
        # remove paddings to get coordinates on original images
        tl_y_orig = tl_y_full - pad_top
        tl_x_orig = tl_x_full - pad_left
        br_y_orig = br_y_full - pad_top
        br_x_orig = br_x_full - pad_left
        
        # fix broken coordinates
        # anything below 0
        # anything above image height (y) or width (x)
        # anything where top left >= bottom right
        tl_y_orig = min(max(tl_y_orig, 0), height_orig)
        tl_x_orig = min(max(tl_x_orig, 0), width_orig)
        br_y_orig = min(max(br_y_orig, 0), height_orig)
        br_x_orig = min(max(br_x_orig, 0), width_orig)
        
        if tl_y_orig >= br_y_orig:
            tl_y_orig = br_y_orig - 1
        if tl_x_orig >= br_x_orig:
            tl_x_orig = br_x_orig - 1
        
        coordinates_orig.append((tl_y_orig, tl_x_orig, br_y_orig, br_x_orig))
    
    """
    # project face coordinates to original image sizes
    coordinates_orig = []
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_nopad):
        height_orig = images[idx].shape[0]
        width_orig = images[idx].shape[1]
        
        tl_y_perc = tl_y / MODEL_IMAGE_HEIGHT
        tl_x_perc = tl_x / MODEL_IMAGE_WIDTH
        br_y_perc = br_y / MODEL_IMAGE_HEIGHT
        br_x_perc = br_x / MODEL_IMAGE_WIDTH
        
        tl_y_orig = int(tl_y_perc * height_orig)
        tl_x_orig = int(tl_x_perc * width_orig)
        br_y_orig = int(br_y_perc * height_orig)
        br_x_orig = int(br_x_perc * width_orig)
        
        coordinates_orig.append((tl_y_orig, tl_x_orig, br_y_orig, br_x_orig))
    
    print("[Coordinates on original image]", coordinates_orig[0])
    
    # remove padding from predicted face coordinates
    # tl = top left, br = bottom right
    coordinates_nopad = []
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions):
        pad_top, pad_right, pad_bottom, pad_left = paddings[idx]
        tl_y_nopad = tl_y - pad_top
        tl_x_nopad = tl_x - pad_left
        br_y_nopad = br_y - pad_top
        br_x_nopad = br_x - pad_left
        tpl = (tl_y_nopad, tl_x_nopad, br_y_nopad, br_x_nopad)
        tpl_fixed = [max(coord, 0) for coord in tpl]
        if tpl_fixed[0] >= tpl_fixed[2]:
            tpl_fixed[2] += 1
        elif tpl_fixed[1] >= tpl_fixed[3]:
            tpl_fixed[3] += 1
        tpl_fixed = tuple(tpl_fixed)
        
        if tpl != tpl_fixed:
            print("[WARNING] Predicted coordinate below 0 after padding-removel. Bad prediction." \
                  " (In image %d, coordinates nopad: %s, coordinates pred: %s)" \
                  % (idx, tpl, coordinates_predictions[idx]))
        
        coordinates_nopad.append(tpl_fixed)
    """
    
    print("[Removed padding from predicted coordinates]", coordinates_orig[0])
    
    # --------------
    # square faces
    # --------------
    coordinates_orig_square = []
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_orig):
        height = br_y - tl_y
        width = br_x - tl_x
        i = 0
        # we remove here instead of adding rows/cols, because that way we wont exceed the
        # image maximum sizes
        while height > width:
            if i % 2 == 0:
                tl_y += 1
            else:
                br_y -= 1
            height -= 1
            i += 1
        while width > height:
            if i % 2 == 0:
                tl_x += 1
            else:
                br_x -= 1
            width -= 1
            i += 1
        print("New height:", (br_y-tl_y), "New width:", (br_x-tl_x))
        coordinates_orig_square.append((tl_y, tl_x, br_y, br_x))
    
    print("[Squared face coordinates]", coordinates_orig_square[0])
    
    # --------------
    # pad faces
    # --------------
    # extract "padded" faces, where the padding is part of the original image
    # (N pixels around the face)
    # After doing that, we can augment the "padded" faces, then remove the padding and have less
    # augmentation damage (i.e. areas that would otherwise be black will now be filled with parts
    # of the original image)
    faces_padded = []
    for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_orig_square):
        image = images[idx]
        # we pad the whole image by N pixels so that we can savely extract an area of N pixels
        # around the face
        image_padded = np.pad(image, ((AUGMENTATION_PADDING, AUGMENTATION_PADDING), \
                                      (AUGMENTATION_PADDING, AUGMENTATION_PADDING), \
                                      (0, 0)), mode=str("median"))
        face_padded = image_padded[tl_y:br_y+2*AUGMENTATION_PADDING, \
                                   tl_x:br_x+2*AUGMENTATION_PADDING, \
                                   ...]
        faces_padded.append(face_padded)
    
    print("[Extracted face with padding]")
    misc.imshow(faces_padded[0])
    
    # --------------
    # augment and save images
    # --------------
    for idx, face_padded in enumerate(faces_padded):
        # these should be the same values for all images
        image_height = face_padded.shape[0]
        image_width = face_padded.shape[1]
        print("[specs of padded face] height", image_height, "width", image_width)
        
        # augment the padded images
        ia = ImageAugmenter(image_width, image_height,
                            channel_is_first_axis=False,
                            hflip=True, vflip=False,
                            scale_to_percent=(0.90, 1.10), scale_axis_equally=True,
                            rotation_deg=45, shear_deg=0,
                            translation_x_px=8, translation_y_px=8)
        images_aug = np.zeros((AUGMENTATION_ITERATIONS, image_height, image_width, 3),
                              dtype=np.uint8)
        for i in range(AUGMENTATION_ITERATIONS):
            images_aug[i, ...] = face_padded
        print("images_aug.shape", images_aug.shape)
        images_aug = ia.augment_batch(images_aug)
        
        # randomly change brightness of whole images
        for idx_aug, image_aug in enumerate(images_aug):
            by_percent = random.uniform(0.90, 1.10)
            images_aug[idx_aug] = np.clip(image_aug * by_percent, 0.0, 1.0)
        print("images_aug.shape [0]:", images_aug.shape)
        
        # add gaussian noise
        # skipped, because that could be added easily in torch as a layer
        #images_aug = images_aug + np.random.normal(0.0, 0.05, images_aug.shape)
        
        # remove the padding
        images_aug = images_aug[:,
                                AUGMENTATION_PADDING:-AUGMENTATION_PADDING,
                                AUGMENTATION_PADDING:-AUGMENTATION_PADDING,
                                ...]
        print("images_aug.shape [1]:", images_aug.shape)
        
        # add the unaugmented image
        images_aug = np.vstack((images_aug, \
                                [face_padded[AUGMENTATION_PADDING:-AUGMENTATION_PADDING, \
                                             AUGMENTATION_PADDING:-AUGMENTATION_PADDING, \
                                             ...]]))
        
        print("images_aug.shape [2]:", images_aug.shape)
        
        # save images
        for i, image_aug in enumerate(images_aug):
            if image_aug.shape[0] * image_aug.shape[1] < MINIMUM_AREA:
                print("Ignoring image %d / %d because it is too small (area of %d vs min. %d)" \
                       % (idx, i, image_aug.shape[0] * image_aug.shape[1], MINIMUM_AREA))
            else:
                image_resized = misc.imresize(image_aug, (OUT_SCALE, OUT_SCALE))
                filename_aug = "%s_%d.jpg" % (images_filenames[idx].replace(".jpg", ""), i)
                #misc.imshow(image_resized)
                misc.imsave(os.path.join(TARGET_DIR, filename_aug), image_resized)
def main():
    """Iterates over the images in each directory, shrinks and augments each one."""
    nb_processed = 0
    nb_errors = 0
    nb_total = len(
        get_all_filepaths(
            [download_dir for download_dir, write_to_dir in DIRS]))

    # iterate over directories (read-directory and save-to-directory)
    for download_dir, write_to_dir in DIRS:
        print("Reading from '%s'" % (download_dir, ))
        print("Writing to '%s'" % (write_to_dir, ))

        # create directory if it doesnt exist
        if not os.path.exists(write_to_dir):
            os.makedirs(write_to_dir)

        # load filepaths of images in directory
        fps_img = get_all_filepaths([download_dir])

        # iterate over each image
        for fp_img in fps_img:
            print("Image %d of %d (%.2f%%) (%s)" \
                  % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img))
            try:
                filename = fp_img[fp_img.rfind("/") + 1:]
                # dont use misc.imload, fails for grayscale images
                image = ndimage.imread(fp_img, mode="RGB")
                image_orig = np.copy(image)
                misc.imshow(image)
                print(image)
                print(image.shape)

                height = image_orig.shape[0]
                width = image_orig.shape[1]
                wh_ratio = width / height

                # add padding at the borders of the image
                # then augment image
                batch = np.zeros((AUGMENTATIONS, height +
                                  (2 * PADDING), width + (2 * PADDING), 3),
                                 dtype=np.uint8)

                img_padded = np.pad(image, ((PADDING, PADDING),
                                            (PADDING, PADDING), (0, 0)),
                                    mode="median")
                for i in range(0, AUGMENTATIONS):
                    batch[i] = np.copy(img_padded)
                    ia = ImageAugmenter(width + (2 * PADDING),
                                        height + (2 * PADDING),
                                        channel_is_first_axis=False,
                                        hflip=True,
                                        vflip=False,
                                        scale_to_percent=(1.05, 1.2),
                                        scale_axis_equally=True,
                                        rotation_deg=5,
                                        shear_deg=1,
                                        translation_x_px=15,
                                        translation_y_px=15)
                    batch = ia.augment_batch(batch)

                    for i in range(0, AUGMENTATIONS):
                        image_resized = misc.imresize(
                            image, (SCALE_HEIGHT, SCALE_WIDTH))
                        # save augmented image
                        filename_aug = filename.replace(".jp", "__%d.jp" % (i))
                        misc.imsave(os.path.join(write_to_dir, filename_aug),
                                    image_resized)
            except IOError as exc:
                # sometimes downloaded images cannot be read by imread()
                # this should catch these cases
                print("I/O error({0}): {1}".format(exc.errno, exc.strerror))
                nb_errors += 1

            nb_processed += 1

        print("Processed %d images" % (nb_processed, ))
        print("Encountered %d errors" % (nb_errors, ))
        print("Finished.")