Exemplo n.º 1
0
def mean_filter2d(
    image: TensorLike,
    filter_shape: Union[List[int], Tuple[int], int] = [3, 3],
    padding: str = "REFLECT",
    constant_values: TensorLike = 0,
    name: Optional[str] = None,
) -> tf.Tensor:
    """Perform mean filtering on image(s).

    Args:
      image: Either a 2-D `Tensor` of shape `[height, width]`,
        a 3-D `Tensor` of shape `[height, width, channels]`,
        or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
      filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying
        the height and width of the 2-D mean filter. Can be a single integer
        to specify the same value for all spatial dimensions.
      padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC".
        The type of padding algorithm to use, which is compatible with
        `mode` argument in `tf.pad`. For more details, please refer to
        https://www.tensorflow.org/api_docs/python/tf/pad.
      constant_values: A `scalar`, the pad value to use in "CONSTANT"
        padding mode.
      name: A name for this operation (optional).
    Returns:
      2-D, 3-D or 4-D `Tensor` of the same dtype as input.
    Raises:
      ValueError: If `image` is not 2, 3 or 4-dimensional,
        if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC",
        or if `filter_shape` is invalid.
    """
    with tf.name_scope(name or "mean_filter2d"):
        image = tf.convert_to_tensor(image, name="image")
        original_ndims = img_utils.get_ndims(image)
        image = img_utils.to_4D_image(image)

        filter_shape = keras_utils.normalize_tuple(filter_shape, 2, "filter_shape")

        # Keep the precision if it's float;
        # otherwise, convert to float32 for computing.
        orig_dtype = image.dtype
        if not image.dtype.is_floating:
            image = tf.dtypes.cast(image, tf.dtypes.float32)

        # Explicitly pad the image
        image = _pad(image, filter_shape, mode=padding, constant_values=constant_values)

        # Filter of shape (filter_width, filter_height, in_channels, 1)
        # has the value of 1 for each element.
        area = tf.constant(filter_shape[0] * filter_shape[1], dtype=image.dtype)
        filter_shape += (tf.shape(image)[-1], 1)
        kernel = tf.ones(shape=filter_shape, dtype=image.dtype)

        output = tf.nn.depthwise_conv2d(
            image, kernel, strides=(1, 1, 1, 1), padding="VALID"
        )

        output /= area

        output = img_utils.from_4D_image(output, original_ndims)
        return tf.dtypes.cast(output, orig_dtype)
Exemplo n.º 2
0
def rotate(
    images: TensorLike,
    angles: TensorLike,
    interpolation: str = "nearest",
    fill_mode: str = "constant",
    name: Optional[str] = None,
    fill_value: TensorLike = 0.0,
) -> tf.Tensor:
    """Rotate image(s) counterclockwise by the passed angle(s) in radians.

    Args:
      images: A tensor of shape
        `(num_images, num_rows, num_columns, num_channels)`
        (NHWC), `(num_rows, num_columns, num_channels)` (HWC), or
        `(num_rows, num_columns)` (HW).
      angles: A scalar angle to rotate all images by, or (if `images` has rank 4)
        a vector of length num_images, with an angle for each image in the
        batch.
      interpolation: Interpolation mode. Supported values: "nearest",
        "constant".
      fill_mode: Points outside the boundaries of the input are filled according
        to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
        - *reflect*: `(d c b a | a b c d | d c b a)`
          The input is extended by reflecting about the edge of the last pixel.
        - *constant*: `(k k k k | a b c d | k k k k)`
          The input is extended by filling all values beyond the edge with the
          same constant value k = 0.
        - *wrap*: `(a b c d | a b c d | a b c d)`
          The input is extended by wrapping around to the opposite edge.
        - *nearest*: `(a a a a | a b c d | d d d d)`
          The input is extended by the nearest pixel.
      fill_value: a float represents the value to be filled outside the
        boundaries when `fill_mode` is "constant".
      name: The name of the op.

    Returns:
      Image(s) with the same type and shape as `images`, rotated by the given
      angle(s). Empty space due to the rotation will be filled with zeros.

    Raises:
      TypeError: If `images` is an invalid type.
    """
    with tf.name_scope(name or "rotate"):
        image_or_images = tf.convert_to_tensor(images)
        if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES:
            raise TypeError("Invalid dtype %s." % image_or_images.dtype)
        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        image_height = tf.cast(tf.shape(images)[1], tf.dtypes.float32)[None]
        image_width = tf.cast(tf.shape(images)[2], tf.dtypes.float32)[None]
        output = transform(
            images,
            angles_to_projective_transforms(angles, image_height, image_width),
            interpolation=interpolation,
            fill_mode=fill_mode,
            fill_value=fill_value,
        )
        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 3
0
def euclidean_dist_transform(
    images: TensorLike,
    dtype: Type[tf.dtypes.DType] = tf.float32,
    name: Optional[str] = None,
) -> tf.Tensor:
    """Applies euclidean distance transform(s) to the image(s).

    Args:
      images: A tensor of shape `(num_images, num_rows, num_columns, 1)`
        (NHWC), or `(num_rows, num_columns, 1)` (HWC) or
        `(num_rows, num_columns)` (HW).
      dtype: `tf.dtypes.DType` of the output tensor.
      name: The name of the op.

    Returns:
      Image(s) with the type `dtype` and same shape as `images`, with the
      transform applied. If a tensor of all ones is given as input, the
      output tensor will be filled with the max value of the `dtype`.

    Raises:
      TypeError: If `image` is not tf.uint8, or `dtype` is not floating point.
      ValueError: If `image` more than one channel, or `image` is not of
        rank between 2 and 4.
    """

    with tf.name_scope(name or "euclidean_distance_transform"):
        image_or_images = tf.convert_to_tensor(images, name="images")

        if image_or_images.dtype.base_dtype != tf.uint8:
            raise TypeError("Invalid dtype %s. Expected uint8." %
                            image_or_images.dtype)

        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        if images.get_shape()[3] != 1 and images.get_shape()[3] is not None:
            raise ValueError("`images` must have only one channel")

        if dtype not in [tf.float16, tf.float32, tf.float64]:
            raise TypeError("`dtype` must be float16, float32 or float64")

        images = tf.cast(images, dtype)
        output = _image_so.ops.addons_euclidean_distance_transform(images)

        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 4
0
def euclidean_dist_transform(
    images: TensorLike,
    dtype: Type[tf.dtypes.DType] = tf.float32,
    name: Optional[str] = None,
) -> tf.Tensor:
    """Applies euclidean distance transform(s) to the image(s).

    Based on [Distance Transforms of Sampled Functions]
    (http://www.theoryofcomputing.org/articles/v008a019/v008a019.pdf).

    Args:
      images: A tensor of shape `(num_images, num_rows, num_columns, num_channels)`
        (NHWC), or `(num_rows, num_columns, num_channels)` (HWC) or
        `(num_rows, num_columns)` (HW).
      dtype: `tf.dtypes.DType` of the output tensor.
      name: The name of the op.

    Returns:
      Image(s) with the type `dtype` and same shape as `images`, with the
      transform applied. If a tensor of all ones is given as input, the
      output tensor will be filled with the max value of the `dtype`.

    Raises:
      TypeError: If `image` is not tf.uint8, or `dtype` is not floating point.
    """

    with tf.name_scope(name or "euclidean_distance_transform"):
        image_or_images = tf.convert_to_tensor(images, name="images")

        if image_or_images.dtype.base_dtype != tf.uint8:
            raise TypeError("Invalid dtype %s. Expected uint8." %
                            image_or_images.dtype)

        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        if dtype not in [tf.float16, tf.float32, tf.float64]:
            raise TypeError("`dtype` must be float16, float32 or float64")

        output = _image_so.ops.addons_euclidean_distance_transform(
            images, dtype)

        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 5
0
def rotate(
    images: TensorLike,
    angles: TensorLike,
    interpolation: str = "NEAREST",
    name: Optional[str] = None,
) -> tf.Tensor:
    """Rotate image(s) counterclockwise by the passed angle(s) in radians.

    Args:
      images: A tensor of shape
        `(num_images, num_rows, num_columns, num_channels)`
        (NHWC), `(num_rows, num_columns, num_channels)` (HWC), or
        `(num_rows, num_columns)` (HW).
      angles: A scalar angle to rotate all images by, or (if `images` has rank 4)
        a vector of length num_images, with an angle for each image in the
        batch.
      interpolation: Interpolation mode. Supported values: "NEAREST",
        "BILINEAR".
      name: The name of the op.

    Returns:
      Image(s) with the same type and shape as `images`, rotated by the given
      angle(s). Empty space due to the rotation will be filled with zeros.

    Raises:
      TypeError: If `images` is an invalid type.
    """
    with tf.name_scope(name or "rotate"):
        image_or_images = tf.convert_to_tensor(images)
        if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES:
            raise TypeError("Invalid dtype %s." % image_or_images.dtype)
        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        image_height = tf.cast(tf.shape(images)[1], tf.dtypes.float32)[None]
        image_width = tf.cast(tf.shape(images)[2], tf.dtypes.float32)[None]
        output = transform(
            images,
            angles_to_projective_transforms(angles, image_height, image_width),
            interpolation=interpolation,
        )
        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 6
0
def transform(
    images: TensorLike,
    transforms: TensorLike,
    interpolation: str = "NEAREST",
    output_shape: Optional[list] = None,
    name: Optional[str] = None,
) -> tf.Tensor:
    """Applies the given transform(s) to the image(s).

    Args:
      images: A tensor of shape (num_images, num_rows, num_columns,
        num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or
        (num_rows, num_columns) (HW).
      transforms: Projective transform matrix/matrices. A vector of length 8 or
        tensor of size N x 8. If one row of transforms is
        [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
        `(x, y)` to a transformed *input* point
        `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
        where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to
        the transform mapping input points to output points. Note that
        gradients are not backpropagated into transformation parameters.
      interpolation: Interpolation mode.
        Supported values: "NEAREST", "BILINEAR".
      output_shape: Output dimesion after the transform, [height, width].
        If None, output is the same size as input image.

      name: The name of the op.

    Returns:
      Image(s) with the same type and shape as `images`, with the given
      transform(s) applied. Transformed coordinates outside of the input image
      will be filled with zeros.

    Raises:
      TypeError: If `image` is an invalid type.
      ValueError: If output shape is not 1-D int32 Tensor.
    """
    with tf.name_scope(name or "transform"):
        image_or_images = tf.convert_to_tensor(images, name="images")
        transform_or_transforms = tf.convert_to_tensor(transforms,
                                                       name="transforms",
                                                       dtype=tf.dtypes.float32)
        if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES:
            raise TypeError("Invalid dtype %s." % image_or_images.dtype)
        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        if output_shape is None:
            output_shape = tf.shape(images)[1:3]

        output_shape = tf.convert_to_tensor(output_shape,
                                            tf.dtypes.int32,
                                            name="output_shape")

        if not output_shape.get_shape().is_compatible_with([2]):
            raise ValueError(
                "output_shape must be a 1-D Tensor of 2 elements: "
                "new_height, new_width")

        if len(transform_or_transforms.get_shape()) == 1:
            transforms = transform_or_transforms[None]
        elif transform_or_transforms.get_shape().ndims is None:
            raise ValueError("transforms rank must be statically known")
        elif len(transform_or_transforms.get_shape()) == 2:
            transforms = transform_or_transforms
        else:
            transforms = transform_or_transforms
            raise ValueError(
                "transforms should have rank 1 or 2, but got rank %d" %
                len(transforms.get_shape()))

        output = tf.raw_ops.ImageProjectiveTransformV2(
            images=images,
            transforms=transforms,
            output_shape=output_shape,
            interpolation=interpolation.upper(),
        )
        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 7
0
def transform(
    images: TensorLike,
    transforms: TensorLike,
    interpolation: str = "nearest",
    fill_mode: str = "constant",
    output_shape: Optional[list] = None,
    name: Optional[str] = None,
    fill_value: TensorLike = 0.0,
) -> tf.Tensor:
    """Applies the given transform(s) to the image(s).

    Args:
      images: A tensor of shape (num_images, num_rows, num_columns,
        num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or
        (num_rows, num_columns) (HW).
      transforms: Projective transform matrix/matrices. A vector of length 8 or
        tensor of size N x 8. If one row of transforms is
        [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
        `(x, y)` to a transformed *input* point
        `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
        where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to
        the transform mapping input points to output points. Note that
        gradients are not backpropagated into transformation parameters.
      interpolation: Interpolation mode.
        Supported values: "nearest", "bilinear".
      fill_mode: Points outside the boundaries of the input are filled according
        to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
        - *reflect*: `(d c b a | a b c d | d c b a)`
          The input is extended by reflecting about the edge of the last pixel.
        - *constant*: `(k k k k | a b c d | k k k k)`
          The input is extended by filling all values beyond the edge with the
          same constant value k = 0.
        - *wrap*: `(a b c d | a b c d | a b c d)`
          The input is extended by wrapping around to the opposite edge.
        - *nearest*: `(a a a a | a b c d | d d d d)`
          The input is extended by the nearest pixel.
      fill_value: a float represents the value to be filled outside the
        boundaries when `fill_mode` is "constant".
      output_shape: Output dimesion after the transform, [height, width].
        If None, output is the same size as input image.

      name: The name of the op.

    Returns:
      Image(s) with the same type and shape as `images`, with the given
      transform(s) applied. Transformed coordinates outside of the input image
      will be filled with zeros.

    Raises:
      TypeError: If `image` is an invalid type.
      ValueError: If output shape is not 1-D int32 Tensor.
    """
    with tf.name_scope(name or "transform"):
        image_or_images = tf.convert_to_tensor(images, name="images")
        transform_or_transforms = tf.convert_to_tensor(transforms,
                                                       name="transforms",
                                                       dtype=tf.dtypes.float32)
        if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES:
            raise TypeError("Invalid dtype %s." % image_or_images.dtype)
        images = img_utils.to_4D_image(image_or_images)
        original_ndims = img_utils.get_ndims(image_or_images)

        if output_shape is None:
            output_shape = tf.shape(images)[1:3]

        output_shape = tf.convert_to_tensor(output_shape,
                                            tf.dtypes.int32,
                                            name="output_shape")

        if not output_shape.get_shape().is_compatible_with([2]):
            raise ValueError(
                "output_shape must be a 1-D Tensor of 2 elements: "
                "new_height, new_width")

        if len(transform_or_transforms.get_shape()) == 1:
            transforms = transform_or_transforms[None]
        elif transform_or_transforms.get_shape().ndims is None:
            raise ValueError("transforms rank must be statically known")
        elif len(transform_or_transforms.get_shape()) == 2:
            transforms = transform_or_transforms
        else:
            transforms = transform_or_transforms
            raise ValueError(
                "transforms should have rank 1 or 2, but got rank %d" %
                len(transforms.get_shape()))

        if LooseVersion(tf.__version__) >= LooseVersion("2.4.0"):
            fill_value = tf.convert_to_tensor(fill_value,
                                              dtype=tf.float32,
                                              name="fill_value")
            output = tf.raw_ops.ImageProjectiveTransformV3(
                images=images,
                transforms=transforms,
                output_shape=output_shape,
                interpolation=interpolation.upper(),
                fill_mode=fill_mode.upper(),
                fill_value=fill_value,
            )
        else:
            fill_mode = fill_mode.upper()
            # TODO(WindQAQ): Get rid of the check once we drop TensorFlow < 2.4 support.
            if fill_mode == "CONSTANT":
                warnings.warn(
                    "fill_value is not supported and is always 0 for TensorFlow < 2.4.0."
                )
            if fill_mode == "NEAREST":
                raise ValueError(
                    "NEAREST fill_mode is not supported for TensorFlow < 2.4.0."
                )
            output = tf.raw_ops.ImageProjectiveTransformV2(
                images=images,
                transforms=transforms,
                output_shape=output_shape,
                interpolation=interpolation.upper(),
                fill_mode=fill_mode,
            )
        return img_utils.from_4D_image(output, original_ndims)
Exemplo n.º 8
0
def median_filter2d(image,
                    filter_shape=(3, 3),
                    padding="REFLECT",
                    constant_values=0,
                    name=None):
    """Perform median filtering on image(s).

    Args:
      image: Either a 2-D `Tensor` of shape `[height, width]`,
        a 3-D `Tensor` of shape `[height, width, channels]`,
        or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
      filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying
        the height and width of the 2-D median filter. Can be a single integer
        to specify the same value for all spatial dimensions.
      padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC".
        The type of padding algorithm to use, which is compatible with
        `mode` argument in `tf.pad`. For more details, please refer to
        https://www.tensorflow.org/api_docs/python/tf/pad.
      constant_values: A `scalar`, the pad value to use in "CONSTANT"
        padding mode.
      name: A name for this operation (optional).
    Returns:
      3-D or 4-D `Tensor` of the same dtype as input.
    Raises:
      ValueError: If `image` is not 2, 3 or 4-dimensional,
        if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC",
        or if `filter_shape` is invalid.
    """
    with tf.name_scope(name or "median_filter2d"):
        image = tf.convert_to_tensor(image, name="image")
        original_ndims = img_utils.get_ndims(image)
        image = img_utils.to_4D_image(image)

        if padding not in ["REFLECT", "CONSTANT", "SYMMETRIC"]:
            raise ValueError(
                "padding should be one of \"REFLECT\", \"CONSTANT\", or "
                "\"SYMMETRIC\".")

        filter_shape = keras_utils.normalize_tuple(filter_shape, 2,
                                                   "filter_shape")

        image_shape = tf.shape(image)
        batch_size = image_shape[0]
        height = image_shape[1]
        width = image_shape[2]
        channels = image_shape[3]

        # Explicitly pad the image
        image = _pad(image,
                     filter_shape,
                     mode=padding,
                     constant_values=constant_values)

        area = filter_shape[0] * filter_shape[1]

        floor = (area + 1) // 2
        ceil = area // 2 + 1

        patches = tf.image.extract_patches(
            image,
            sizes=[1, filter_shape[0], filter_shape[1], 1],
            strides=[1, 1, 1, 1],
            rates=[1, 1, 1, 1],
            padding="VALID")

        patches = tf.reshape(patches,
                             shape=[batch_size, height, width, area, channels])

        patches = tf.transpose(patches, [0, 1, 2, 4, 3])

        # Note the returned median is casted back to the original type
        # Take [5, 6, 7, 8] for example, the median is (6 + 7) / 2 = 3.5
        # It turns out to be int(6.5) = 6 if the original type is int
        top = tf.nn.top_k(patches, k=ceil).values
        if area % 2 == 1:
            median = top[:, :, :, :, floor - 1]
        else:
            median = (top[:, :, :, :, floor - 1] +
                      top[:, :, :, :, ceil - 1]) / 2

        output = tf.cast(median, image.dtype)
        output = img_utils.from_4D_image(output, original_ndims)
        return output
Exemplo n.º 9
0
def gaussian_filter2d(
    image: TensorLike,
    filter_shape: Union[List[int], Tuple[int], int] = [3, 3],
    sigma: Union[List[float], Tuple[float], float] = 1.0,
    padding: str = "REFLECT",
    constant_values: TensorLike = 0,
    name: Optional[str] = None,
) -> TensorLike:
    """Perform Gaussian blur on image(s).

    Args:
      image: Either a 2-D `Tensor` of shape `[height, width]`,
        a 3-D `Tensor` of shape `[height, width, channels]`,
        or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
      filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying
        the height and width of the 2-D gaussian filter. Can be a single
        integer to specify the same value for all spatial dimensions.
      sigma: A `float` or `tuple`/`list` of 2 floats, specifying
        the standard deviation in x and y direction the 2-D gaussian filter.
        Can be a single float to specify the same value for all spatial
        dimensions.
      padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC".
        The type of padding algorithm to use, which is compatible with
        `mode` argument in `tf.pad`. For more details, please refer to
        https://www.tensorflow.org/api_docs/python/tf/pad.
      constant_values: A `scalar`, the pad value to use in "CONSTANT"
        padding mode.
      name: A name for this operation (optional).
    Returns:
      2-D, 3-D or 4-D `Tensor` of the same dtype as input.
    Raises:
      ValueError: If `image` is not 2, 3 or 4-dimensional,
        if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC",
        if `filter_shape` is invalid,
        or if `sigma` is invalid.
    """
    with tf.name_scope(name or "gaussian_filter2d"):
        if isinstance(sigma, (list, tuple)):
            if len(sigma) != 2:
                raise ValueError(
                    "sigma should be a float or a tuple/list of 2 floats")
        else:
            sigma = (sigma, ) * 2

        if any(s < 0 for s in sigma):
            raise ValueError("sigma should be greater than or equal to 0.")

        image = tf.convert_to_tensor(image, name="image")
        sigma = tf.convert_to_tensor(sigma, name="sigma")

        original_ndims = img_utils.get_ndims(image)
        image = img_utils.to_4D_image(image)

        # Keep the precision if it's float;
        # otherwise, convert to float32 for computing.
        orig_dtype = image.dtype
        if not image.dtype.is_floating:
            image = tf.cast(image, tf.float32)

        channels = tf.shape(image)[3]
        filter_shape = keras_utils.normalize_tuple(filter_shape, 2,
                                                   "filter_shape")

        sigma = tf.cast(sigma, image.dtype)
        gaussian_kernel_x = _get_gaussian_kernel(sigma[1], filter_shape[1])
        gaussian_kernel_x = gaussian_kernel_x[tf.newaxis, :]

        gaussian_kernel_y = _get_gaussian_kernel(sigma[0], filter_shape[0])
        gaussian_kernel_y = gaussian_kernel_y[:, tf.newaxis]

        gaussian_kernel_2d = _get_gaussian_kernel_2d(gaussian_kernel_y,
                                                     gaussian_kernel_x)
        gaussian_kernel_2d = gaussian_kernel_2d[:, :, tf.newaxis, tf.newaxis]
        gaussian_kernel_2d = tf.tile(gaussian_kernel_2d, [1, 1, channels, 1])

        image = _pad(image,
                     filter_shape,
                     mode=padding,
                     constant_values=constant_values)

        output = tf.nn.depthwise_conv2d(
            input=image,
            filter=gaussian_kernel_2d,
            strides=(1, 1, 1, 1),
            padding="VALID",
        )
        output = img_utils.from_4D_image(output, original_ndims)
        return tf.cast(output, orig_dtype)
Exemplo n.º 10
0
def sparse_image_warp(
    image: TensorLike,
    source_control_point_locations: TensorLike,
    dest_control_point_locations: TensorLike,
    interpolation_order: int = 2,
    regularization_weight: FloatTensorLike = 0.0,
    num_boundary_points: int = 0,
    name: str = "sparse_image_warp",
) -> tf.Tensor:
    """Image warping using correspondences between sparse control points.

    Apply a non-linear warp to the image, where the warp is specified by
    the source and destination locations of a (potentially small) number of
    control points. First, we use a polyharmonic spline
    (`tfa.image.interpolate_spline`) to interpolate the displacements
    between the corresponding control points to a dense flow field.
    Then, we warp the image using this dense flow field
    (`tfa.image.dense_image_warp`).

    Let t index our control points. For `regularization_weight = 0`, we have:
    warped_image[b, dest_control_point_locations[b, t, 0],
                    dest_control_point_locations[b, t, 1], :] =
    image[b, source_control_point_locations[b, t, 0],
             source_control_point_locations[b, t, 1], :].

    For `regularization_weight > 0`, this condition is met approximately, since
    regularized interpolation trades off smoothness of the interpolant vs.
    reconstruction of the interpolant at the control points.
    See `tfa.image.interpolate_spline` for further documentation of the
    `interpolation_order` and `regularization_weight` arguments.


    Args:
      image: Either a 2-D float `Tensor` of shape `[height, width]`,
        a 3-D `Tensor` of shape `[height, width, channels]`,
        or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
        `batch_size` is assumed as one when `image` is a 2-D or 3-D `Tensor`.
      source_control_point_locations: `[batch_size, num_control_points, 2]` float
        `Tensor`.
      dest_control_point_locations: `[batch_size, num_control_points, 2]` float
        `Tensor`.
      interpolation_order: polynomial order used by the spline interpolation
      regularization_weight: weight on smoothness regularizer in interpolation
      num_boundary_points: How many zero-flow boundary points to include at
        each image edge. Usage:
        - `num_boundary_points=0`: don't add zero-flow points
        - `num_boundary_points=1`: 4 corners of the image
        - `num_boundary_points=2`: 4 corners and one in the middle of each edge
          (8 points total)
        - `num_boundary_points=n`: 4 corners and n-1 along each edge
      name: A name for the operation (optional).

      Note that `image` and `offsets` can be of type `tf.half`, `tf.float32`, or
      `tf.float64`, and do not necessarily have to be the same type.

    Returns:
      warped_image: a float `Tensor` with the same shape and dtype as `image`.
      flow_field: `[batch_size, height, width, 2]` float `Tensor` containing the
        dense flow field produced by the interpolation.
    """

    image = tf.convert_to_tensor(image)
    original_ndims = img_utils.get_ndims(image)
    image = img_utils.to_4D_image(image)

    source_control_point_locations = tf.convert_to_tensor(
        source_control_point_locations)
    dest_control_point_locations = tf.convert_to_tensor(
        dest_control_point_locations)

    control_point_flows = dest_control_point_locations - source_control_point_locations

    clamp_boundaries = num_boundary_points > 0
    boundary_points_per_edge = num_boundary_points - 1

    with tf.name_scope(name or "sparse_image_warp"):
        image_shape = tf.shape(image)
        batch_size, image_height, image_width = (
            image_shape[0],
            image_shape[1],
            image_shape[2],
        )

        # This generates the dense locations where the interpolant
        # will be evaluated.
        grid_locations = _get_grid_locations(image_height, image_width)

        flattened_grid_locations = tf.reshape(grid_locations,
                                              [image_height * image_width, 2])

        flattened_grid_locations = tf.cast(
            _expand_to_minibatch(flattened_grid_locations, batch_size),
            image.dtype)

        if clamp_boundaries:
            (
                dest_control_point_locations,
                control_point_flows,
            ) = _add_zero_flow_controls_at_boundary(
                dest_control_point_locations,
                control_point_flows,
                image_height,
                image_width,
                boundary_points_per_edge,
            )

        flattened_flows = interpolate_spline(
            dest_control_point_locations,
            control_point_flows,
            flattened_grid_locations,
            interpolation_order,
            regularization_weight,
        )

        dense_flows = tf.reshape(flattened_flows,
                                 [batch_size, image_height, image_width, 2])

        warped_image = dense_image_warp(image, dense_flows)

        return img_utils.from_4D_image(warped_image,
                                       original_ndims), dense_flows
Exemplo n.º 11
0
def gaussian_filter2d(
    image: FloatTensorLike,
    filter_shape: Union[List[int], Tuple[int]] = [3, 3],
    sigma: FloatTensorLike = 1,
    padding: str = "REFLECT",
    constant_values: TensorLike = 0,
    name: Optional[str] = None,
) -> FloatTensorLike:
    """Perform Gaussian blur on image(s).

    Args:
      image: Either a 2-D `Tensor` of shape `[height, width]`,
        a 3-D `Tensor` of shape `[height, width, channels]`,
        or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
      filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying
        the height and width of the 2-D gaussian filter. Can be a single
        integer to specify the same value for all spatial dimensions.
      sigma: Standard deviation of Gaussian.
      padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC".
        The type of padding algorithm to use, which is compatible with
        `mode` argument in `tf.pad`. For more details, please refer to
        https://www.tensorflow.org/api_docs/python/tf/pad.
      constant_values: A `scalar`, the pad value to use in "CONSTANT"
        padding mode.
      name: A name for this operation (optional).
    Returns:
      2-D, 3-D or 4-D `Tensor` of the same dtype as input.
    Raises:
      ValueError: If `image` is not 2, 3 or 4-dimensional,
        if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC",
        if `filter_shape` is invalid,
        or if `sigma` is less than or equal to 0.
    """
    with tf.name_scope(name or "gaussian_filter2d"):
        if sigma <= 0:
            raise ValueError("Sigma should not be zero")
        if padding not in ["REFLECT", "CONSTANT", "SYMMETRIC"]:
            raise ValueError(
                "Padding should be REFLECT, CONSTANT, OR SYMMETRIC")

        image = tf.cast(image, tf.float32)
        original_ndims = img_utils.get_ndims(image)
        image = img_utils.to_4D_image(image)
        channels = tf.shape(image)[3]
        filter_shape = keras_utils.normalize_tuple(filter_shape, 2,
                                                   "filter_shape")

        gaussian_filter_x = _get_gaussian_kernel(sigma, filter_shape[1])
        gaussian_filter_x = tf.cast(gaussian_filter_x, tf.float32)
        gaussian_filter_x = tf.reshape(gaussian_filter_x, [1, filter_shape[1]])

        gaussian_filter_y = _get_gaussian_kernel(sigma, filter_shape[0])
        gaussian_filter_y = tf.reshape(gaussian_filter_y, [filter_shape[0], 1])
        gaussian_filter_y = tf.cast(gaussian_filter_y, tf.float32)

        gaussian_filter_2d = _get_gaussian_kernel_2d(gaussian_filter_y,
                                                     gaussian_filter_x)
        gaussian_filter_2d = tf.repeat(gaussian_filter_2d, channels)
        gaussian_filter_2d = tf.reshape(
            gaussian_filter_2d,
            [filter_shape[0], filter_shape[1], channels, 1])

        image = _pad(
            image,
            filter_shape,
            mode=padding,
            constant_values=constant_values,
        )

        output = tf.nn.depthwise_conv2d(
            input=image,
            filter=gaussian_filter_2d,
            strides=(1, 1, 1, 1),
            padding="VALID",
        )
        output = img_utils.from_4D_image(output, original_ndims)
        return output