示例#1
0
    def forward(self, input_image):
        """Forward pass."""
        input_data = np.pad(input_image, self.padding, mode='constant')
        input_data = self.preprocess(input_data)
        input_shape = (1, ) + input_data.shape
        input_data = input_data.reshape(input_shape)
        input_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32)

        # Rotate input.
        pivot = np.array(input_data.shape[1:3]) / 2
        rvecs = self.get_se2(self.num_rotations, pivot)
        input_tensor = tf.repeat(input_tensor,
                                 repeats=self.num_rotations,
                                 axis=0)
        input_tensor = tfa_image.transform(input_tensor,
                                           rvecs,
                                           interpolation='NEAREST')

        # Forward pass.
        input_tensor = tf.split(input_tensor, self.num_rotations)
        logits = ()
        for x in input_tensor:
            logits += (self.model(x), )
        logits = tf.concat(logits, axis=0)

        # Rotate back output.
        rvecs = self.get_se2(self.num_rotations, pivot, reverse=True)
        logits = tfa_image.transform(logits, rvecs, interpolation='NEAREST')
        c0 = self.padding[:2, 0]
        c1 = c0 + input_image.shape[:2]
        output = logits[:, c0[0]:c1[0], c0[1]:c1[1], :]
        return output
示例#2
0
  def forward(self, in_img, softmax=True):
    """Forward pass."""
    in_data = np.pad(in_img, self.padding, mode='constant')
    in_data = self.preprocess(in_data)
    in_shape = (1,) + in_data.shape
    in_data = in_data.reshape(in_shape)
    in_tens = tf.convert_to_tensor(in_data, dtype=tf.float32)

    # Rotate input.
    pivot = np.array(in_data.shape[1:3]) / 2
    rvecs = self.get_se2(self.n_rotations, pivot)
    in_tens = tf.repeat(in_tens, repeats=self.n_rotations, axis=0)
    in_tens = tfa_image.transform(in_tens, rvecs, interpolation='NEAREST')

    # Forward pass.
    in_tens = tf.split(in_tens, self.n_rotations)
    logits = ()
    for x in in_tens:
      logits += (self.model(x),)
    logits = tf.concat(logits, axis=0)

    # Rotate back output.
    rvecs = self.get_se2(self.n_rotations, pivot, reverse=True)
    logits = tfa_image.transform(logits, rvecs, interpolation='NEAREST')
    c0 = self.padding[:2, 0]
    c1 = c0 + in_img.shape[:2]
    logits = logits[:, c0[0]:c1[0], c0[1]:c1[1], :]

    logits = tf.transpose(logits, [3, 1, 2, 0])
    output = tf.reshape(logits, (1, np.prod(logits.shape)))
    if softmax:
      output = tf.nn.softmax(output)
      output = np.float32(output).reshape(logits.shape[1:])
    return output
示例#3
0
  def forward(self, in_img, p, softmax=True):
    """Forward pass."""
    img_unprocessed = np.pad(in_img, self.padding, mode='constant')
    input_data = self.preprocess(img_unprocessed.copy())
    in_shape = (1,) + input_data.shape
    input_data = input_data.reshape(in_shape)
    in_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32)

    # Rotate crop.
    pivot = np.array([p[1], p[0]]) + self.pad_size
    rvecs = self.get_se2(self.n_rotations, pivot)

    # Crop before network (default for Transporters in CoRL submission).
    crop = tf.convert_to_tensor(input_data.copy(), dtype=tf.float32)
    crop = tf.repeat(crop, repeats=self.n_rotations, axis=0)
    crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST')
    crop = crop[:, p[0]:(p[0] + self.crop_size),
                p[1]:(p[1] + self.crop_size), :]
    logits, kernel_raw = self.model([in_tensor, crop])

    # Crop after network (for receptive field, and more elegant).
    # logits, crop = self.model([in_tensor, in_tensor])
    # # crop = tf.identity(kernel_bef_crop)
    # crop = tf.repeat(crop, repeats=self.n_rotations, axis=0)
    # crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST')
    # kernel_raw = crop[:, p[0]:(p[0] + self.crop_size),
    #                   p[1]:(p[1] + self.crop_size), :]

    # Obtain kernels for cross-convolution.
    kernel_paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
    kernel = tf.pad(kernel_raw, kernel_paddings, mode='CONSTANT')
    kernel = tf.transpose(kernel, [1, 2, 3, 0])

    return self.correlate(logits, kernel, softmax)
示例#4
0
  def test_perspective_transform_integer_centers_preset(self, dtype,
                                                        interpolation):
    """Tests that we can reproduce the results of tfa_image.transform."""
    image = tf.constant(
        ((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0), (10.0, 11.0, 12.0)),
        dtype=dtype)
    scale = 3
    transformation = tf.constant(
        ((1.0 / scale, 0.0, 0.0), (0.0, 1.0 / scale, 0.0), (0.0, 0.0, 1.0)),
        dtype=dtype)

    image_shape = tf.shape(input=image)
    image_resized_shape = image_shape * scale
    image = image[tf.newaxis, ..., tf.newaxis]
    transformation = transformation[tf.newaxis, ...]
    image_resized = tfa_image.transform(
        tf.cast(image, tf.float32),
        tf.cast(
            tfa_image.transform_ops.matrices_to_flat_transforms(transformation),
            tf.float32),
        interpolation=interpolation,
        output_shape=image_resized_shape)
    image_transformed = transformer.perspective_transform(
        image,
        transformation,
        resampling_type=transformer.ResamplingType.NEAREST
        if interpolation == "NEAREST" else transformer.ResamplingType.BILINEAR,
        pixel_type=transformer.PixelType.INTEGER,
        output_shape=image_resized_shape)

    self.assertAllClose(image_resized, image_transformed)
def shear_y(image, level, fill_value):
    """Equivalent of PIL Shearing in Y dimension."""
    # Shear parallel to y axis is a projective transform
    # with a matrix form of:
    # [1  0
    #  level  1].
    image = tfi.transform(wrap(image), [1., 0., 0., level, 1., 0., 0., 0.])
    return unwrap(image, fill_value)
示例#6
0
def shear_y(image, level, replace):
    """Equivalent of PIL Shearing in Y dimension."""
    # Shear parallel to y axis is a projective transform
    # with a matrix form of:
    # [1  0
    #  level  1].
    image = contrib_image.transform(wrap(image),
                                    [1.0, 0.0, 0.0, level, 1.0, 0.0, 0.0, 0.0])
    return unwrap(image, replace)
示例#7
0
def shear_x(image, level, replace):
    """Equivalent of PIL Shearing in X dimension."""
    # Shear parallel to x axis is a projective transform
    # with a matrix form of:
    # [1  level
    #  0  1].
    image = contrib_image.transform(wrap(image),
                                    [1., level, 0., 0., 1., 0., 0., 0.])
    return unwrap(image, replace)
示例#8
0
def shear_y(image, level, replace):
    """Equivalent of PIL Shearing in Y dimension."""
    # Shear parallel to y axis is a projective transform
    # with a matrix form of:
    # [1  0
    #  level  1].
    with tf.name_scope("shear_y"):
        image = image_ops.transform(
            images=wrap(image), transforms=[1., 0., 0., level, 1., 0., 0., 0.])
        return unwrap(image, replace)
示例#9
0
  def _transform_fn(data):
    """Applies a random transform to the pixels."""
    # EMNIST background is 1.0 but tfa_image.transform assumes 0.0, so invert.
    pixels = 1.0 - data['pixels']

    pixels = tfa_image.transform(pixels, transform, 'BILINEAR')

    # num_bits=9 actually yields 256 unique values.
    pixels = tf.quantization.quantize_and_dequantize(
        pixels, 0.0, 1.0, num_bits=9, range_given=True)

    data['pixels'] = 1.0 - pixels
    return data
示例#10
0
def apply_affine_transform(image,
                           rotation=0.,
                           shift_x=0.,
                           shift_y=0.,
                           shear_x=0.,
                           shear_y=0.,
                           scale_x=1.,
                           scale_y=1.,
                           interpolation='bilinear'):
    """Applies affine transform(s) on the input images.

  The rotation, shear, and scaling transforms are applied relative to the image
  center, instead of (0, 0). The transform parameters can either be scalars
  (applied to all images in the batch) or [B]-tensors (applied to each image
  individually).

  Args:
    image: Input images in [B, H, W, C] format.
    rotation: Rotation angle in radians. Positive value rotates the image
      counter-clockwise.
    shift_x: Translation in x direction, in pixels.
    shift_y: Translation in y direction, in pixels.
    shear_x: Shear angle (radians) in x direction.
    shear_y: Shear angle (radians) in y direction.
    scale_x: Scaling factor in x direction.
    scale_y: Scaling factor in y direction.
    interpolation: Interpolation mode. Supported values: 'nearest', 'bilinear'.

  Returns:
    The transformed images in [B, H, W, C] format.
  """
    height, width = image.shape[1:3]

    rotation = tfa_image.angles_to_projective_transforms(
        rotation, height, width)
    shear = shears_to_projective_transforms([shear_x, shear_y], height, width)
    scaling = scales_to_projective_transforms([scale_x, scale_y], height,
                                              width)
    translation = tfa_image.translations_to_projective_transforms(
        [shift_x, shift_y])

    t = tfa_image.compose_transforms([rotation, shear, scaling, translation])
    transformed = tfa_image.transform(image, t, interpolation=interpolation)

    return transformed
示例#11
0
  def _transform_fn(data):
    """Applies a random transform to the pixels."""
    # EMNIST background is 1.0 but tfa_image.transform assumes 0.0, so invert.
    pixels = 1.0 - data['pixels']

    pixels = tfa_image.transform(pixels, transform, 'BILINEAR')

    # num_bits=9 actually yields 256 unique values.
    pixels = tf.quantization.quantize_and_dequantize(
        pixels, 0.0, 1.0, num_bits=9, range_given=True)

    pixels = 1.0 - pixels

    result = data.copy()

    # The first pseudoclient applies the identity transformation.
    result['pixels'] = tf.cond(
        tf.equal(index, 0), lambda: data['pixels'], lambda: pixels)

    return result
    def _heavy_data_augmentation_fn(self, example):
        """Perform heavy augmentation on a given input data example.

    This is the same data augmentation as the one done by Ben Graham, the winner
    of the 2015 Kaggle competition. See:
    https://github.com/btgraham/SparseConvNet/blob/a6bdb0c938b3556c1e6c23d5a014db9f404502b9/kaggleDiabetes1.cpp#L12

    Args:
      example: A dictionary containing an "image" key with the image to
        augment.

    Returns:
      The input dictionary with the key "image" containing the augmented image.
    """
        image = example["image"]
        image_shape = tf.shape(image)
        if len(image.get_shape().as_list()) not in [2, 3]:
            raise ValueError(
                "Input image must be a rank-2 or rank-3 tensor, but rank-{} "
                "was given".format(len(image.get_shape().as_list())))
        height = tf.cast(image_shape[0], dtype=tf.float32)
        width = tf.cast(image_shape[1], dtype=tf.float32)
        # Sample data augmentation parameters.
        s, a, b, hf, vf, dx, dy = self._sample_heavy_data_augmentation_parameters(
        )
        # Rotation + scale.
        c00 = (1 + s) * tf.cos(a)
        c01 = (1 + s) * tf.sin(a)
        c10 = (s - 1) * tf.sin(b)
        c11 = (1 - s) * tf.cos(b)
        # Horizontal and vertial flipping.
        c00 = c00 * hf
        c01 = c01 * hf
        c10 = c10 * vf
        c11 = c11 * vf
        # Convert x,y translation to absolute values.
        dx = width * dx
        dy = height * dy
        # Convert affine matrix to TF's transform. Matrix is applied w.r.t. the
        # center of the image.
        cy = height / 2.0
        cx = width / 2.0
        affine_matrix = [[c00, c01, (1.0 - c00) * cx - c01 * cy + dx],
                         [c10, c11, (1.0 - c11) * cy - c10 * cx + dy],
                         [0.0, 0.0, 1.0]]
        affine_matrix = tf.convert_to_tensor(affine_matrix, dtype=tf.float32)
        transform = tfa_image.transform_ops.matrices_to_flat_transforms(
            tf.linalg.inv(affine_matrix))
        if self._config in self._CONFIGS_WITH_GREY_BACKGROUND:
            # Since background is grey in these configs, put in pixels in [-1, 1]
            # range to avoid artifacts from the affine transformation.
            image = tf.cast(image, dtype=tf.float32)
            image = (image / 127.5) - 1.0
        # Apply the affine transformation.
        image = tfa_image.transform(images=image, transforms=transform)
        if self._config in self._CONFIGS_WITH_GREY_BACKGROUND:
            # Put pixels back to [0, 255] range and cast to uint8, since this is what
            # our preprocessing pipeline usually expects.
            image = (1.0 + image) * 127.5
            image = tf.cast(image, dtype=tf.uint8)
        example["image"] = image
        return example
示例#13
0
    def forward(self, in_img, goal_img, p, apply_softmax=True):  # pylint: disable=g-doc-args
        """Forward pass of goal-conditioned Transporters.

    Runs input through all three networks, to get output of the same
    shape, except the last channel is 3 (output_dim). Then, the output
    for one stream has the convolutional kernels for another. Call
    tf.nn.convolution, and the operation is be differentiable, so that
    gradients apply to all the FCNs.

    Cropping after passing the input image to the query network is
    easier, because otherwise we need to do a forward pass, then call
    tf.multiply, then do a second forward pass after that.

    Returns:
      ouput tensor
    """
        assert in_img.shape == goal_img.shape, f'{in_img.shape}, {goal_img.shape}'

        # input image --> TF tensor, shape (384,224,6) --> (1,384,224,6)
        input_unproc = np.pad(in_img, self.padding, mode='constant')
        input_data = self.preprocess(input_unproc.copy())
        input_shape = (1, ) + input_data.shape
        input_data = input_data.reshape(input_shape)
        in_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32)

        # goal image --> TF tensor, shape (384,224,6) --> (1,384,224,6)
        goal_unproc = np.pad(goal_img, self.padding, mode='constant')
        goal_data = self.preprocess(goal_unproc.copy())
        goal_shape = (1, ) + goal_data.shape
        goal_data = goal_data.reshape(goal_shape)
        goal_tensor = tf.convert_to_tensor(goal_data, dtype=tf.float32)

        # Get SE2 rotation vectors for cropping.
        pivot = np.array([p[1], p[0]]) + self.pad_size
        rvecs = self.get_se2(self.num_rotations, pivot)

        # Forward pass through three separate FCNs. All logits: (1,384,224,3).
        in_logits, kernel_nocrop_logits, goal_logits = self.model(
            [in_tensor, in_tensor, goal_tensor])

        # Use features from goal logits and combine with input and kernel.
        goal_x_in_logits = tf.multiply(goal_logits, in_logits)
        goal_x_kernel_logits = tf.multiply(goal_logits, kernel_nocrop_logits)

        # Crop the kernel_logits about the picking point and get rotations.
        crop = tf.identity(goal_x_kernel_logits)  # (1,384,224,3)
        crop = tf.repeat(crop, repeats=self.num_rotations,
                         axis=0)  # (24,384,224,3)
        crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST')
        kernel = crop[:, p[0]:(p[0] + self.crop_size),
                      p[1]:(p[1] + self.crop_size), :]
        assert kernel.shape == (self.num_rotations, self.crop_size,
                                self.crop_size, self.odim)

        # Cross-convolve `in_x_goal_logits`. Padding kernel: (24,64,64,3) -->
        # (65,65,3,24).
        kernel_paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
        kernel = tf.pad(kernel, kernel_paddings, mode='CONSTANT')
        kernel = tf.transpose(kernel, [1, 2, 3, 0])
        output = tf.nn.convolution(goal_x_in_logits,
                                   kernel,
                                   data_format='NHWC')
        output = (1 / (self.crop_size**2)) * output

        if apply_softmax:
            output_shape = output.shape
            output = tf.reshape(output, (1, np.prod(output.shape)))
            output = tf.nn.softmax(output)
            output = np.float32(output).reshape(output_shape[1:])

        # Daniel: visualize crops and kernels, for Transporter-Goal figure.
        # self.visualize_images(p, in_img, input_data, crop)
        # self.visualize_transport(p, in_img, input_data, crop, kernel)
        # self.visualize_logits(in_logits,            name='input')
        # self.visualize_logits(goal_logits,          name='goal')
        # self.visualize_logits(kernel_nocrop_logits, name='kernel')
        # self.visualize_logits(goal_x_in_logits,     name='goal_x_in')
        # self.visualize_logits(goal_x_kernel_logits, name='goal_x_kernel')

        return output