def forward(self, input_image): """Forward pass.""" input_data = np.pad(input_image, self.padding, mode='constant') input_data = self.preprocess(input_data) input_shape = (1, ) + input_data.shape input_data = input_data.reshape(input_shape) input_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32) # Rotate input. pivot = np.array(input_data.shape[1:3]) / 2 rvecs = self.get_se2(self.num_rotations, pivot) input_tensor = tf.repeat(input_tensor, repeats=self.num_rotations, axis=0) input_tensor = tfa_image.transform(input_tensor, rvecs, interpolation='NEAREST') # Forward pass. input_tensor = tf.split(input_tensor, self.num_rotations) logits = () for x in input_tensor: logits += (self.model(x), ) logits = tf.concat(logits, axis=0) # Rotate back output. rvecs = self.get_se2(self.num_rotations, pivot, reverse=True) logits = tfa_image.transform(logits, rvecs, interpolation='NEAREST') c0 = self.padding[:2, 0] c1 = c0 + input_image.shape[:2] output = logits[:, c0[0]:c1[0], c0[1]:c1[1], :] return output
def forward(self, in_img, softmax=True): """Forward pass.""" in_data = np.pad(in_img, self.padding, mode='constant') in_data = self.preprocess(in_data) in_shape = (1,) + in_data.shape in_data = in_data.reshape(in_shape) in_tens = tf.convert_to_tensor(in_data, dtype=tf.float32) # Rotate input. pivot = np.array(in_data.shape[1:3]) / 2 rvecs = self.get_se2(self.n_rotations, pivot) in_tens = tf.repeat(in_tens, repeats=self.n_rotations, axis=0) in_tens = tfa_image.transform(in_tens, rvecs, interpolation='NEAREST') # Forward pass. in_tens = tf.split(in_tens, self.n_rotations) logits = () for x in in_tens: logits += (self.model(x),) logits = tf.concat(logits, axis=0) # Rotate back output. rvecs = self.get_se2(self.n_rotations, pivot, reverse=True) logits = tfa_image.transform(logits, rvecs, interpolation='NEAREST') c0 = self.padding[:2, 0] c1 = c0 + in_img.shape[:2] logits = logits[:, c0[0]:c1[0], c0[1]:c1[1], :] logits = tf.transpose(logits, [3, 1, 2, 0]) output = tf.reshape(logits, (1, np.prod(logits.shape))) if softmax: output = tf.nn.softmax(output) output = np.float32(output).reshape(logits.shape[1:]) return output
def forward(self, in_img, p, softmax=True): """Forward pass.""" img_unprocessed = np.pad(in_img, self.padding, mode='constant') input_data = self.preprocess(img_unprocessed.copy()) in_shape = (1,) + input_data.shape input_data = input_data.reshape(in_shape) in_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32) # Rotate crop. pivot = np.array([p[1], p[0]]) + self.pad_size rvecs = self.get_se2(self.n_rotations, pivot) # Crop before network (default for Transporters in CoRL submission). crop = tf.convert_to_tensor(input_data.copy(), dtype=tf.float32) crop = tf.repeat(crop, repeats=self.n_rotations, axis=0) crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST') crop = crop[:, p[0]:(p[0] + self.crop_size), p[1]:(p[1] + self.crop_size), :] logits, kernel_raw = self.model([in_tensor, crop]) # Crop after network (for receptive field, and more elegant). # logits, crop = self.model([in_tensor, in_tensor]) # # crop = tf.identity(kernel_bef_crop) # crop = tf.repeat(crop, repeats=self.n_rotations, axis=0) # crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST') # kernel_raw = crop[:, p[0]:(p[0] + self.crop_size), # p[1]:(p[1] + self.crop_size), :] # Obtain kernels for cross-convolution. kernel_paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) kernel = tf.pad(kernel_raw, kernel_paddings, mode='CONSTANT') kernel = tf.transpose(kernel, [1, 2, 3, 0]) return self.correlate(logits, kernel, softmax)
def test_perspective_transform_integer_centers_preset(self, dtype, interpolation): """Tests that we can reproduce the results of tfa_image.transform.""" image = tf.constant( ((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0), (10.0, 11.0, 12.0)), dtype=dtype) scale = 3 transformation = tf.constant( ((1.0 / scale, 0.0, 0.0), (0.0, 1.0 / scale, 0.0), (0.0, 0.0, 1.0)), dtype=dtype) image_shape = tf.shape(input=image) image_resized_shape = image_shape * scale image = image[tf.newaxis, ..., tf.newaxis] transformation = transformation[tf.newaxis, ...] image_resized = tfa_image.transform( tf.cast(image, tf.float32), tf.cast( tfa_image.transform_ops.matrices_to_flat_transforms(transformation), tf.float32), interpolation=interpolation, output_shape=image_resized_shape) image_transformed = transformer.perspective_transform( image, transformation, resampling_type=transformer.ResamplingType.NEAREST if interpolation == "NEAREST" else transformer.ResamplingType.BILINEAR, pixel_type=transformer.PixelType.INTEGER, output_shape=image_resized_shape) self.assertAllClose(image_resized, image_transformed)
def shear_y(image, level, fill_value): """Equivalent of PIL Shearing in Y dimension.""" # Shear parallel to y axis is a projective transform # with a matrix form of: # [1 0 # level 1]. image = tfi.transform(wrap(image), [1., 0., 0., level, 1., 0., 0., 0.]) return unwrap(image, fill_value)
def shear_y(image, level, replace): """Equivalent of PIL Shearing in Y dimension.""" # Shear parallel to y axis is a projective transform # with a matrix form of: # [1 0 # level 1]. image = contrib_image.transform(wrap(image), [1.0, 0.0, 0.0, level, 1.0, 0.0, 0.0, 0.0]) return unwrap(image, replace)
def shear_x(image, level, replace): """Equivalent of PIL Shearing in X dimension.""" # Shear parallel to x axis is a projective transform # with a matrix form of: # [1 level # 0 1]. image = contrib_image.transform(wrap(image), [1., level, 0., 0., 1., 0., 0., 0.]) return unwrap(image, replace)
def shear_y(image, level, replace): """Equivalent of PIL Shearing in Y dimension.""" # Shear parallel to y axis is a projective transform # with a matrix form of: # [1 0 # level 1]. with tf.name_scope("shear_y"): image = image_ops.transform( images=wrap(image), transforms=[1., 0., 0., level, 1., 0., 0., 0.]) return unwrap(image, replace)
def _transform_fn(data): """Applies a random transform to the pixels.""" # EMNIST background is 1.0 but tfa_image.transform assumes 0.0, so invert. pixels = 1.0 - data['pixels'] pixels = tfa_image.transform(pixels, transform, 'BILINEAR') # num_bits=9 actually yields 256 unique values. pixels = tf.quantization.quantize_and_dequantize( pixels, 0.0, 1.0, num_bits=9, range_given=True) data['pixels'] = 1.0 - pixels return data
def apply_affine_transform(image, rotation=0., shift_x=0., shift_y=0., shear_x=0., shear_y=0., scale_x=1., scale_y=1., interpolation='bilinear'): """Applies affine transform(s) on the input images. The rotation, shear, and scaling transforms are applied relative to the image center, instead of (0, 0). The transform parameters can either be scalars (applied to all images in the batch) or [B]-tensors (applied to each image individually). Args: image: Input images in [B, H, W, C] format. rotation: Rotation angle in radians. Positive value rotates the image counter-clockwise. shift_x: Translation in x direction, in pixels. shift_y: Translation in y direction, in pixels. shear_x: Shear angle (radians) in x direction. shear_y: Shear angle (radians) in y direction. scale_x: Scaling factor in x direction. scale_y: Scaling factor in y direction. interpolation: Interpolation mode. Supported values: 'nearest', 'bilinear'. Returns: The transformed images in [B, H, W, C] format. """ height, width = image.shape[1:3] rotation = tfa_image.angles_to_projective_transforms( rotation, height, width) shear = shears_to_projective_transforms([shear_x, shear_y], height, width) scaling = scales_to_projective_transforms([scale_x, scale_y], height, width) translation = tfa_image.translations_to_projective_transforms( [shift_x, shift_y]) t = tfa_image.compose_transforms([rotation, shear, scaling, translation]) transformed = tfa_image.transform(image, t, interpolation=interpolation) return transformed
def _transform_fn(data): """Applies a random transform to the pixels.""" # EMNIST background is 1.0 but tfa_image.transform assumes 0.0, so invert. pixels = 1.0 - data['pixels'] pixels = tfa_image.transform(pixels, transform, 'BILINEAR') # num_bits=9 actually yields 256 unique values. pixels = tf.quantization.quantize_and_dequantize( pixels, 0.0, 1.0, num_bits=9, range_given=True) pixels = 1.0 - pixels result = data.copy() # The first pseudoclient applies the identity transformation. result['pixels'] = tf.cond( tf.equal(index, 0), lambda: data['pixels'], lambda: pixels) return result
def _heavy_data_augmentation_fn(self, example): """Perform heavy augmentation on a given input data example. This is the same data augmentation as the one done by Ben Graham, the winner of the 2015 Kaggle competition. See: https://github.com/btgraham/SparseConvNet/blob/a6bdb0c938b3556c1e6c23d5a014db9f404502b9/kaggleDiabetes1.cpp#L12 Args: example: A dictionary containing an "image" key with the image to augment. Returns: The input dictionary with the key "image" containing the augmented image. """ image = example["image"] image_shape = tf.shape(image) if len(image.get_shape().as_list()) not in [2, 3]: raise ValueError( "Input image must be a rank-2 or rank-3 tensor, but rank-{} " "was given".format(len(image.get_shape().as_list()))) height = tf.cast(image_shape[0], dtype=tf.float32) width = tf.cast(image_shape[1], dtype=tf.float32) # Sample data augmentation parameters. s, a, b, hf, vf, dx, dy = self._sample_heavy_data_augmentation_parameters( ) # Rotation + scale. c00 = (1 + s) * tf.cos(a) c01 = (1 + s) * tf.sin(a) c10 = (s - 1) * tf.sin(b) c11 = (1 - s) * tf.cos(b) # Horizontal and vertial flipping. c00 = c00 * hf c01 = c01 * hf c10 = c10 * vf c11 = c11 * vf # Convert x,y translation to absolute values. dx = width * dx dy = height * dy # Convert affine matrix to TF's transform. Matrix is applied w.r.t. the # center of the image. cy = height / 2.0 cx = width / 2.0 affine_matrix = [[c00, c01, (1.0 - c00) * cx - c01 * cy + dx], [c10, c11, (1.0 - c11) * cy - c10 * cx + dy], [0.0, 0.0, 1.0]] affine_matrix = tf.convert_to_tensor(affine_matrix, dtype=tf.float32) transform = tfa_image.transform_ops.matrices_to_flat_transforms( tf.linalg.inv(affine_matrix)) if self._config in self._CONFIGS_WITH_GREY_BACKGROUND: # Since background is grey in these configs, put in pixels in [-1, 1] # range to avoid artifacts from the affine transformation. image = tf.cast(image, dtype=tf.float32) image = (image / 127.5) - 1.0 # Apply the affine transformation. image = tfa_image.transform(images=image, transforms=transform) if self._config in self._CONFIGS_WITH_GREY_BACKGROUND: # Put pixels back to [0, 255] range and cast to uint8, since this is what # our preprocessing pipeline usually expects. image = (1.0 + image) * 127.5 image = tf.cast(image, dtype=tf.uint8) example["image"] = image return example
def forward(self, in_img, goal_img, p, apply_softmax=True): # pylint: disable=g-doc-args """Forward pass of goal-conditioned Transporters. Runs input through all three networks, to get output of the same shape, except the last channel is 3 (output_dim). Then, the output for one stream has the convolutional kernels for another. Call tf.nn.convolution, and the operation is be differentiable, so that gradients apply to all the FCNs. Cropping after passing the input image to the query network is easier, because otherwise we need to do a forward pass, then call tf.multiply, then do a second forward pass after that. Returns: ouput tensor """ assert in_img.shape == goal_img.shape, f'{in_img.shape}, {goal_img.shape}' # input image --> TF tensor, shape (384,224,6) --> (1,384,224,6) input_unproc = np.pad(in_img, self.padding, mode='constant') input_data = self.preprocess(input_unproc.copy()) input_shape = (1, ) + input_data.shape input_data = input_data.reshape(input_shape) in_tensor = tf.convert_to_tensor(input_data, dtype=tf.float32) # goal image --> TF tensor, shape (384,224,6) --> (1,384,224,6) goal_unproc = np.pad(goal_img, self.padding, mode='constant') goal_data = self.preprocess(goal_unproc.copy()) goal_shape = (1, ) + goal_data.shape goal_data = goal_data.reshape(goal_shape) goal_tensor = tf.convert_to_tensor(goal_data, dtype=tf.float32) # Get SE2 rotation vectors for cropping. pivot = np.array([p[1], p[0]]) + self.pad_size rvecs = self.get_se2(self.num_rotations, pivot) # Forward pass through three separate FCNs. All logits: (1,384,224,3). in_logits, kernel_nocrop_logits, goal_logits = self.model( [in_tensor, in_tensor, goal_tensor]) # Use features from goal logits and combine with input and kernel. goal_x_in_logits = tf.multiply(goal_logits, in_logits) goal_x_kernel_logits = tf.multiply(goal_logits, kernel_nocrop_logits) # Crop the kernel_logits about the picking point and get rotations. crop = tf.identity(goal_x_kernel_logits) # (1,384,224,3) crop = tf.repeat(crop, repeats=self.num_rotations, axis=0) # (24,384,224,3) crop = tfa_image.transform(crop, rvecs, interpolation='NEAREST') kernel = crop[:, p[0]:(p[0] + self.crop_size), p[1]:(p[1] + self.crop_size), :] assert kernel.shape == (self.num_rotations, self.crop_size, self.crop_size, self.odim) # Cross-convolve `in_x_goal_logits`. Padding kernel: (24,64,64,3) --> # (65,65,3,24). kernel_paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) kernel = tf.pad(kernel, kernel_paddings, mode='CONSTANT') kernel = tf.transpose(kernel, [1, 2, 3, 0]) output = tf.nn.convolution(goal_x_in_logits, kernel, data_format='NHWC') output = (1 / (self.crop_size**2)) * output if apply_softmax: output_shape = output.shape output = tf.reshape(output, (1, np.prod(output.shape))) output = tf.nn.softmax(output) output = np.float32(output).reshape(output_shape[1:]) # Daniel: visualize crops and kernels, for Transporter-Goal figure. # self.visualize_images(p, in_img, input_data, crop) # self.visualize_transport(p, in_img, input_data, crop, kernel) # self.visualize_logits(in_logits, name='input') # self.visualize_logits(goal_logits, name='goal') # self.visualize_logits(kernel_nocrop_logits, name='kernel') # self.visualize_logits(goal_x_in_logits, name='goal_x_in') # self.visualize_logits(goal_x_kernel_logits, name='goal_x_kernel') return output