def test_die_on_tensor_shape_with_rank_three(self):
     tensor_shape = tf.TensorShape(dims=[32, 299, 384])
     with self.assertRaises(ValueError):
         static_shape.get_batch_size(tensor_shape)
         static_shape.get_height(tensor_shape)
         static_shape.get_width(tensor_shape)
         static_shape.get_depth(tensor_shape)
Exemplo n.º 2
0
def pad_to_multiple(tensor, multiple):
  """Returns the tensor zero padded to the specified multiple.

  Appends 0s to the end of the first and second dimension (height and width) of
  the tensor until both dimensions are a multiple of the input argument
  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
  be of shape [1, 4, 8, 1].

  Args:
    tensor: rank 4 float32 tensor, where
            tensor -> [batch_size, height, width, channels].
    multiple: the multiple to pad to.

  Returns:
    padded_tensor: the tensor zero padded to the specified multiple.
  """
  tensor_shape = tensor.get_shape()
  batch_size = static_shape.get_batch_size(tensor_shape)
  tensor_height = static_shape.get_height(tensor_shape)
  tensor_width = static_shape.get_width(tensor_shape)
  tensor_depth = static_shape.get_depth(tensor_shape)

  if batch_size is None:
    batch_size = tf.shape(tensor)[0]

  if tensor_height is None:
    tensor_height = tf.shape(tensor)[1]
    padded_tensor_height = tf.to_int32(
        tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_height = int(
        math.ceil(float(tensor_height) / multiple) * multiple)

  if tensor_width is None:
    tensor_width = tf.shape(tensor)[2]
    padded_tensor_width = tf.to_int32(
        tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_width = int(
        math.ceil(float(tensor_width) / multiple) * multiple)

  if tensor_depth is None:
    tensor_depth = tf.shape(tensor)[3]

  # Use tf.concat instead of tf.pad to preserve static shape
  if padded_tensor_height != tensor_height:
    height_pad = tf.zeros([
        batch_size, padded_tensor_height - tensor_height, tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, height_pad], 1)
  if padded_tensor_width != tensor_width:
    width_pad = tf.zeros([
        batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, width_pad], 2)

  return tensor
Exemplo n.º 3
0
def check_min_image_dim(min_dim, image_tensor):
    """Checks that the image width/height are greater than some number.

    This function is used to check that the width and height of an image are above
    a certain value. If the image shape is static, this function will perform the
    check at graph construction time. Otherwise, if the image shape varies, an
    Assertion control dependency will be added to the graph.

    Args:
      min_dim: The minimum number of pixels along the width and height of the
               image.
      image_tensor: The image tensor to check size for.

    Returns:
      If `image_tensor` has dynamic size, return `image_tensor` with a Assert
      control dependency. Otherwise returns image_tensor.

    Raises:
      ValueError: if `image_tensor`'s' width or height is smaller than `min_dim`.
    """
    image_shape = image_tensor.get_shape()
    image_height = static_shape.get_height(image_shape)
    image_width = static_shape.get_width(image_shape)
    if image_height is None or image_width is None:
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(image_tensor)[1], min_dim),
                tf.greater_equal(tf.shape(image_tensor)[2], min_dim),
            ),
            [
                "image size must be >= {} in both height and width.".format(
                    min_dim)
            ],
        )
        with tf.control_dependencies([shape_assert]):
            return tf.identity(image_tensor)

    if image_height < min_dim or image_width < min_dim:
        raise ValueError(
            "image size must be >= %d in both height and width; image dim = %d,%d"
            % (min_dim, image_height, image_width))

    return image_tensor
  def _predict(self, image_features, num_predictions_per_location):
    """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
      num_predictions_per_location: an integer representing the number of box
        predictions to be made per spatial location in the feature map.

    Returns:
      A dictionary containing the following tensors.
        box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
          code_size] representing the location of the objects, where
          num_anchors = feat_height * feat_width * num_predictions_per_location
        class_predictions_with_background: A float tensor of shape
          [batch_size, num_anchors, num_classes + 1] representing the class
          predictions for the proposals.
    """
    features_depth = static_shape.get_depth(image_features.get_shape())
    depth = max(min(features_depth, self._max_depth), self._min_depth)

    # Add a slot for the background class.
    num_class_slots = self.num_classes + 1
    net = image_features
    with slim.arg_scope(self._conv_hyperparams), \
         slim.arg_scope([slim.dropout], is_training=self._is_training):
      # Add additional conv layers before the predictor.
      if depth > 0 and self._num_layers_before_predictor > 0:
        for i in range(self._num_layers_before_predictor):
          net = slim.conv2d(
              net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
      with slim.arg_scope([slim.conv2d], activation_fn=None,
                          normalizer_fn=None, normalizer_params=None):
        box_encodings = slim.conv2d(
            net, num_predictions_per_location * self._box_code_size,
            [self._kernel_size, self._kernel_size],
            scope='BoxEncodingPredictor')
        if self._use_dropout:
          net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        class_predictions_with_background = slim.conv2d(
            net, num_predictions_per_location * num_class_slots,
            [self._kernel_size, self._kernel_size], scope='ClassPredictor')
        if self._apply_sigmoid_to_scores:
          class_predictions_with_background = tf.sigmoid(
              class_predictions_with_background)

    batch_size = static_shape.get_batch_size(image_features.get_shape())
    if batch_size is None:
      features_height = static_shape.get_height(image_features.get_shape())
      features_width = static_shape.get_width(image_features.get_shape())
      flattened_predictions_size = (features_height * features_width *
                                    num_predictions_per_location)
      box_encodings = tf.reshape(
          box_encodings,
          [-1, flattened_predictions_size, 1, self._box_code_size])
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [-1, flattened_predictions_size, num_class_slots])
    else:
      box_encodings = tf.reshape(
          box_encodings, [batch_size, -1, 1, self._box_code_size])
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background, [batch_size, -1, num_class_slots])
    return {BOX_ENCODINGS: box_encodings,
            CLASS_PREDICTIONS_WITH_BACKGROUND:
            class_predictions_with_background}
 def test_return_correct_height(self):
     tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
     self.assertEqual(299, static_shape.get_height(tensor_shape))