def test_resize_sparse_flow(self): flow = tf.constant( [[[1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], dtype=tf.float32) mask = tf.constant([[[1], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]]], dtype=tf.float32) flow_result = tf.constant([[[0.25, 0], [0, 0]], [[0, 0], [0, 0]]], dtype=tf.float32) mask_result = tf.constant([[[1], [0]], [[0], [0]]], dtype=tf.float32) flow_resized, mask_resized = uflow_utils.resize(flow, 2, 2, is_flow=True, mask=mask) flow_okay = tf.reduce_all(tf.math.equal(flow_resized, flow_result)).numpy() mask_okay = tf.reduce_all(tf.math.equal(mask_resized, mask_result)).numpy() self.assertTrue(flow_okay) self.assertTrue(mask_okay)
def random_scale(images, flow=None, mask=None, min_scale=1.0, max_scale=1.0): """Performs a random scaling in the given range.""" # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] scale = tf.random.uniform([], minval=min_scale, maxval=max_scale, dtype=tf.float32) new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # rescale the images (and flow) images = uflow_utils.resize(images, new_height, new_width, is_flow=False) if flow is not None: flow, mask = uflow_utils.resize( flow, new_height, new_width, is_flow=True, mask=mask) return images, flow, mask
def random_scale_second(images, flow=None, mask=None, min_scale=1.0, max_scale=1.0): """Performs a random scaling on the second image in the given range.""" # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] scale = tf.random.uniform([], minval=min_scale, maxval=max_scale, dtype=tf.float32) new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast(tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # rescale only the second image image_1, image_2 = tf.unstack(images) image_2 = uflow_utils.resize(image_2, new_height, new_width, is_flow=False) # crop either first or second image to have matching dimensions if scale < 1.0: image_1 = _center_crop(image_1, new_height, new_width) else: image_2 = _center_crop(image_2, orig_height, orig_width) images = tf.stack([image_1, image_2]) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute scale factor of the actual new image resolution scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast( orig_height, tf.float32) scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast( orig_width, tf.float32) scale_flow = tf.stack([scale_flow_h, scale_flow_w]) # compute augmented flow (multiply by mask to zero invalid flow locations) flow = ((positions + flow) * scale_flow - positions) * mask if scale < 1.0: # in case we downsample the image we crop the reference image to keep the # same shape flow = _center_crop(flow, new_height, new_width) mask = _center_crop(mask, new_height, new_width) return images, flow, mask
def transform(images, i_or_ij, is_flow, crop_height, crop_width, shift_heights, shift_widths, resize): # Expect (i, j) for flows and masks and i for images. if isinstance(i_or_ij, int): i = i_or_ij # Flow needs i and j. assert not is_flow else: i, j = i_or_ij if is_flow: shifts = tf.stack([shift_heights, shift_widths], axis=-1) flow_offset = shifts[i] - shifts[j] images = images + tf.cast(flow_offset, tf.float32) shift_height = shift_heights[i] shift_width = shift_widths[i] height = images.shape[-3] width = images.shape[-2] # Assert that the cropped bounding box does not go out of the image frame. op1 = tf.compat.v1.assert_greater_equal(crop_height + shift_height, 0) op2 = tf.compat.v1.assert_greater_equal(crop_width + shift_width, 0) op3 = tf.compat.v1.assert_less_equal( height - crop_height + shift_height, height) op4 = tf.compat.v1.assert_less_equal(width - crop_width + shift_width, width) op5 = tf.compat.v1.assert_greater( height, 2 * crop_height, message='Image height is too small for cropping.') op6 = tf.compat.v1.assert_greater( width, 2 * crop_width, message='Image width is too small for cropping.') with tf.control_dependencies([op1, op2, op3, op4, op5, op6]): images = images[:, crop_height + shift_height:height - crop_height + shift_height, crop_width + shift_width:width - crop_width + shift_width, :] if resize: images = uflow_utils.resize(images, height, width, is_flow=is_flow) images.set_shape((images.shape[0], height, width, images.shape[3])) else: images.set_shape((images.shape[0], height - 2 * crop_height, width - 2 * crop_width, images.shape[3])) return images
def parse_data(proto, height, width): """Parse features from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example format. height: int, desired image height. width: int, desired image width. Returns: A sequence of images as tf.Tensor of shape [sequence length, height, width, 3]. """ # Parse context and image sequence from protobuffer. unused_context_parsed, sequence_parsed = tf.io.parse_single_sequence_example( proto, context_features={ 'height': tf.io.FixedLenFeature([], tf.int64), 'width': tf.io.FixedLenFeature([], tf.int64) }, sequence_features={ 'images': tf.io.FixedLenSequenceFeature([], tf.string) }) # Deserialize images to float32 tensors. def deserialize(image_raw): image_uint = tf.image.decode_png(image_raw) image_float = tf.image.convert_image_dtype(image_uint, tf.float32) return image_float images = tf.map_fn(deserialize, sequence_parsed['images'], dtype=tf.float32) # Resize images. images = uflow_utils.resize(images, height, width, is_flow=False) return images
def parse_data(proto, include_flow, height=None, width=None, include_occlusion=False, include_invalid=False, resize_gt_flow=True, gt_flow_shape=None): """Parse a data proto with flow. Args: proto: path to data proto file include_flow: bool, whether or not to include flow in the output height: int or None height to resize image to width: int or None width to resize image to include_occlusion: bool, whether or not to also return occluded pixels (will throw error if occluded pixels are not present) include_invalid: bool, whether or not to also return invalid pixels (will throw error if invalid pixels are not present) resize_gt_flow: bool, wether or not to resize flow ground truth as the image gt_flow_shape: list, shape of the original ground truth flow (only required to set a fixed ground truth flow shape for tensorflow estimator in case of supervised training at full resolution resize_gt_flow=False) Returns: images, flow: A tuple of (image1, image2), flow """ # Parse context and image sequence from protobuffer. context_features = { 'height': tf.io.FixedLenFeature([], tf.int64), 'width': tf.io.FixedLenFeature([], tf.int64), } sequence_features = { 'images': tf.io.FixedLenSequenceFeature([], tf.string), } if include_invalid: sequence_features['invalid_masks'] = tf.io.FixedLenSequenceFeature( [], tf.string) if include_flow: context_features['flow_uv'] = tf.io.FixedLenFeature([], tf.string) if include_occlusion: context_features['occlusion_mask'] = tf.io.FixedLenFeature([], tf.string) context_parsed, sequence_parsed = tf.io.parse_single_sequence_example( proto, context_features=context_features, sequence_features=sequence_features, ) def deserialize(s, dtype, dims): return tf.reshape( tf.io.decode_raw(s, dtype), [context_parsed['height'], context_parsed['width'], dims]) images = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3), sequence_parsed['images'], dtype=tf.uint8) images = tf.image.convert_image_dtype(images, tf.float32) if height is not None and width is not None: images = uflow_utils.resize(images, height, width, is_flow=False) output = [images] if include_flow: flow_uv = deserialize(context_parsed['flow_uv'], tf.float32, 2) flow_uv = flow_uv[Ellipsis, ::-1] if height is not None and width is not None and resize_gt_flow: flow_uv = uflow_utils.resize(flow_uv, height, width, is_flow=True) else: if gt_flow_shape is not None: flow_uv.set_shape(gt_flow_shape) # To be consistent with uflow internals, we flip the ordering of flow. output.append(flow_uv) # create valid mask flow_valid = tf.ones_like(flow_uv[Ellipsis, :1], dtype=tf.float32) output.append(flow_valid) if include_occlusion: occlusion_mask = deserialize(context_parsed['occlusion_mask'], tf.uint8, 1) if height is not None and width is not None: occlusion_mask = uflow_utils.resize(occlusion_mask, height, width, is_flow=False) output.append(occlusion_mask) if include_invalid: invalid_masks = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1), sequence_parsed['invalid_masks'], dtype=tf.uint8) if height is not None and width is not None: invalid_masks = uflow_utils.resize(invalid_masks, height, width, is_flow=False) output.append(invalid_masks) # Only put the output in a list if there are more than one items in there. if len(output) == 1: output = output[0] return output
def parse_supervised_train_data(proto, height, width, resize_gt_flow): """Parse proto from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example format. height: int, desired image height. width: int, desired image width. resize_gt_flow: bool, wether or not to resize flow according to the images Returns: A tuple of tf.Tensors for images, flow_uv, flow_valid, where uv represents the flow field and valid a mask for which entries are valid (this uses the occ version that includes all flow vectors). The images and the corresponding flow field are resized to the specified [height, width]. """ images, flow_uv_occ, _, flow_valid_occ, _ = parse_eval_data(proto) flow_valid_occ = tf.cast(flow_valid_occ, tf.float32) if not resize_gt_flow or height is None or width is None: # Crop to a size that fits all KITTI 2015 image resolutions. Because the # first 156 sequences have a resolution of 375x1242,the remaining 44 # sequences include resolutions of 370x1224, 374x1238, and 376x1241. _, orig_height, orig_width, _ = tf.unstack(tf.shape(images)) offset_height = tf.cast((orig_height - 370) / 2, tf.int32) offset_width = tf.cast((orig_width - 1224) / 2, tf.int32) images = tf.image.crop_to_bounding_box( images, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) flow_uv_occ = tf.image.crop_to_bounding_box( flow_uv_occ, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) flow_valid_occ = tf.image.crop_to_bounding_box( flow_valid_occ, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) # resize images if height is not None and width is not None: images = uflow_utils.resize(images, height, width, is_flow=False) if resize_gt_flow and height is not None and width is not None: # resize flow and swap label order flow_uv, flow_valid = uflow_utils.resize( flow_uv_occ[Ellipsis, ::-1], height, width, is_flow=True, mask=flow_valid_occ) else: # only swap label order flow_uv = flow_uv_occ[Ellipsis, ::-1] flow_valid = flow_valid_occ # set shape to work with tf estimator flow_uv.set_shape([370, 1224, 2]) flow_valid.set_shape([370, 1224, 1]) return images, flow_uv, flow_valid
def random_crop(images, flow=None, mask=None, crop_height=None, crop_width=None, relative_offset=0): """Performs a random crop with the given height and width.""" # early return if crop_height or crop_width is not specified if crop_height is None or crop_width is None: return images, flow, mask orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # check if crop size fits the image size scale = 1.0 ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32) scale = tf.math.maximum(scale, ratio) ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32) scale = tf.math.maximum(scale, ratio) # compute minimum required hight new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # perform resize (scales with 1 if not required) images = uflow_utils.resize(images, new_height, new_width, is_flow=False) # compute joint offset max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32) max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32) joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32) joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32) # compute relative offset min_relative_offset_h = tf.math.maximum( joint_offset_h - relative_offset, 0) max_relative_offset_h = tf.math.minimum( joint_offset_h + relative_offset, max_offset_h) min_relative_offset_w = tf.math.maximum( joint_offset_w - relative_offset, 0) max_relative_offset_w = tf.math.minimum( joint_offset_w + relative_offset, max_offset_w) relative_offset_h = tf.random.uniform( [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1, dtype=tf.int32) relative_offset_w = tf.random.uniform( [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1, dtype=tf.int32) # crop both images image_1, image_2 = tf.unstack(images) image_1 = tf.image.crop_to_bounding_box( image_1, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) image_2 = tf.image.crop_to_bounding_box( image_2, offset_height=relative_offset_h, offset_width=relative_offset_w, target_height=crop_height, target_width=crop_width) images = tf.stack([image_1, image_2]) if flow is not None: # perform resize (scales with 1 if not required) flow, mask = uflow_utils.resize( flow, new_height, new_width, is_flow=True, mask=mask) # crop flow and mask flow = tf.image.crop_to_bounding_box( flow, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) mask = tf.image.crop_to_bounding_box( mask, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) # correct flow for relative shift (/crop) flow_delta = tf.stack( [tf.cast(relative_offset_h - joint_offset_h, tf.float32), tf.cast(relative_offset_w - joint_offset_w, tf.float32)]) flow = (flow - flow_delta) * mask return images, flow, mask
def batch_infer_no_tf_function(self, images, input_height=None, input_width=None, resize_flow_to_img_res=True, infer_occlusion=False): """Infers flow from two images. Args: images: tf.tensor of shape [batchsize, 2, height, width, 3]. input_height: height at which the model should be applied if different from image height. input_width: width at which the model should be applied if different from image width resize_flow_to_img_res: bool, if True, return the flow resized to the same resolution as (image1, image2). If False, return flow at the whatever resolution the model natively predicts it. infer_occlusion: bool, if True, return both flow and a soft occlusion mask, else return just flow. Returns: Optical flow for each pixel in image1 pointing to image2. """ batch_size, seq_len, orig_height, orig_width, image_channels = images.shape.as_list( ) if input_height is None: input_height = orig_height if input_width is None: input_width = orig_width # Ensure a feasible computation resolution. If specified size is not # feasible with the model, change it to a slightly higher resolution. divisible_by_num = pow(2.0, self._num_levels) if (input_height % divisible_by_num != 0 or input_width % divisible_by_num != 0): print('Cannot process images at a resolution of ' + str(input_height) + 'x' + str(input_width) + ', since the height and/or width is not a ' 'multiple of ' + str(divisible_by_num) + '.') # compute a feasible resolution input_height = int( math.ceil(float(input_height) / divisible_by_num) * divisible_by_num) input_width = int( math.ceil(float(input_width) / divisible_by_num) * divisible_by_num) print('Inference will be run at a resolution of ' + str(input_height) + 'x' + str(input_width) + '.') # Resize images to desired input height and width. if input_height != orig_height or input_width != orig_width: images = uflow_utils.resize(images, input_height, input_width, is_flow=False) # Flatten images by folding sequence length into the batch dimension, apply # the feature network and undo the flattening. images_flattened = tf.reshape( images, [batch_size * seq_len, input_height, input_width, image_channels]) # noinspection PyCallingNonCallable features_flattened = self._feature_model( images_flattened, split_features_by_sample=False) features = [ tf.reshape(f, [batch_size, seq_len] + f.shape.as_list()[1:]) for f in features_flattened ] features1, features2 = [[f[:, i] for f in features] for i in range(2)] # Compute flow in frame of image1. # noinspection PyCallingNonCallable flow = self._flow_model(features1, features2, training=False)[0] if infer_occlusion: # noinspection PyCallingNonCallable flow_backward = self._flow_model(features2, features1, training=False)[0] occlusion_mask = self.infer_occlusion(flow, flow_backward) occlusion_mask = uflow_utils.resize(occlusion_mask, orig_height, orig_width, is_flow=False) # Resize and rescale flow to original resolution. This always needs to be # done because flow is generated at a lower resolution. if resize_flow_to_img_res: flow = uflow_utils.resize(flow, orig_height, orig_width, is_flow=True) if infer_occlusion: return flow, occlusion_mask return flow