def testDieOnTargetSizeGreaterThanImageSize(self): image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]]) with self.test_session(): image_placeholder = tf.placeholder(tf.float32) padded_image = utils.pad_to_bounding_box(image_placeholder, 0, 0, 2, 1, 255) with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, 'target_width must be >= width'): padded_image.eval(feed_dict={image_placeholder: image}) padded_image = utils.pad_to_bounding_box(image_placeholder, 0, 0, 1, 2, 255) with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, 'target_height must be >= height'): padded_image.eval(feed_dict={image_placeholder: image})
def testDieIfImageTensorRankIsNotThree(self): image = np.vstack([[5, 6], [9, 0]]) with self.test_session(): image_placeholder = tf.placeholder(tf.float32) padded_image = utils.pad_to_bounding_box(image_placeholder, 0, 0, 2, 2, 255) with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, 'Wrong image tensor rank'): padded_image.eval(feed_dict={image_placeholder: image})
def testReturnOriginalImageWhenTargetSizeIsEqualToImageSize(self): image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]]) with self.test_session(): image_placeholder = tf.placeholder(tf.float32) padded_image = utils.pad_to_bounding_box(image_placeholder, 0, 0, 2, 2, 255) self.assertAllClose( padded_image.eval(feed_dict={image_placeholder: image}), image)
def testDieIfTargetSizeNotPossibleWithGivenOffset(self): image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]]) with self.test_session(): image_placeholder = tf.placeholder(tf.float32) padded_image = utils.pad_to_bounding_box(image_placeholder, 3, 0, 4, 4, 255) with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, 'target size not possible with the given target offsets'): padded_image.eval(feed_dict={image_placeholder: image})
def testReturnPaddedImageWithNonZeroPadValue(self): for dtype in [np.int32, np.int64, np.float32, np.float64]: image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]]).astype(dtype) expected_image = np.dstack([[[255, 255, 255, 255, 255], [255, 255, 255, 255, 255], [255, 5, 6, 255, 255], [255, 9, 0, 255, 255], [255, 255, 255, 255, 255]], [[255, 255, 255, 255, 255], [255, 255, 255, 255, 255], [255, 4, 3, 255, 255], [255, 3, 5, 255, 255], [255, 255, 255, 255, 255]]]).astype(dtype) with self.test_session(): image_placeholder = tf.placeholder(tf.float32) padded_image = utils.pad_to_bounding_box( image_placeholder, 2, 1, 5, 5, 255) self.assertAllClose( padded_image.eval(feed_dict={image_placeholder: image}), expected_image)
def preprocess_image_and_label(image, label, model_input_height, model_input_width, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. model_input_height: The height of the input feed to model extractor. model_input_width: The width of the input feed to model extractor. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') processed_image = tf.cast(image, tf.float32) if label is not None: label = tf.cast(label, tf.int32) # Data augmentation by randomly scaling the inputs. if is_training: scale = utils.get_random_scale(min_scale_factor, max_scale_factor, scale_factor_step_size) processed_image, label = utils.randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) if not is_training: processed_image, label = utils.resize_to_target( processed_image, label, model_input_height, model_input_width) # Pad image and label to have # dimensions >= [model_input_height, model_input_width] image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height +\ tf.maximum(model_input_height - image_height, 0) target_width = image_width +\ tf.maximum(model_input_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape(MEAN_UINT_PIXEL, [1, 1, 3]) processed_image = utils.pad_to_bounding_box(processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = utils.pad_to_bounding_box(label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. if is_training and label is not None: processed_image, label = utils.random_crop([processed_image, label], model_input_height, model_input_width) processed_image.set_shape([model_input_height, model_input_width, 3]) if label is not None: label.set_shape([model_input_height, model_input_width, 1]) if is_training: # Randomly left-right flip the image and label. processed_image, label, _ = utils.flip_dim([processed_image, label], _PROB_OF_FLIP, dim=1) processed_image = _preprocess_zero_mean_unit_range(processed_image) return processed_image, label