Пример #1
0
 def graph_fn():
     keypoint_visibilities = tf.constant([[True, True, False],
                                          [False, True, False]])
     per_keypoint_weights = [1.0, 2.0, 3.0]
     keypoint_weights = keypoint_ops.keypoint_weights_from_visibilities(
         keypoint_visibilities, per_keypoint_weights)
     return keypoint_weights
Пример #2
0
    def test_keypoint_weights_from_visibilities_no_per_kpt_weights(self):
        keypoint_visibilities = tf.constant([[True, True, False],
                                             [False, True, False]])
        keypoint_weights = keypoint_ops.keypoint_weights_from_visibilities(
            keypoint_visibilities)

        expected_keypoint_weights = [[1.0, 1.0, 0.0], [0.0, 1.0, 0.0]]
        with self.test_session() as sess:
            output = sess.run(keypoint_weights)
            self.assertAllClose(expected_keypoint_weights, output)
Пример #3
0
 def graph_fn():
     keypoint_visibilities = tf.constant([[True, True, False],
                                          [False, True, False]])
     keypoint_weights = keypoint_ops.keypoint_weights_from_visibilities(
         keypoint_visibilities)
     return keypoint_weights
Пример #4
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False,
                         keypoint_type_weight=None):
    """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. keypoint_type_weight (optional): If groundtruth keypoints are in
     the tensor dictionary, per-keypoint weights are produced. These weights are
     initialized by `keypoint_type_weight` (or ones if left None).
     Then, for all keypoints that are not visible, the weights are set to 0 (to
     avoid penalizing the model in a loss function).
  5. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  6. one_hot_encoding: applied to classes tensor in tensor_dict.
  7. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as class targets
      instead of one-hot encoding of `groundtruth_classes`. When
      this is True and multiclass_scores is empty, one-hot encoding of
      `groundtruth_classes` is used as a fallback.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
    retain_original_image_additional_channels: (optional) Whether to retain
      original image additional channels in the output dictionary.
    keypoint_type_weight: A list (of length num_keypoints) containing
      groundtruth loss weights to use for each keypoint. If None, will use a
      weight of 1.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
    out_tensor_dict = tensor_dict.copy()
    if fields.InputDataFields.multiclass_scores in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.
            multiclass_scores] = _multiclass_scores_or_one_hot_labels(
                out_tensor_dict[fields.InputDataFields.multiclass_scores],
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                out_tensor_dict[fields.InputDataFields.groundtruth_classes],
                num_classes)

    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
            out_tensor_dict)
        out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

    if retain_original_image:
        out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
            image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                             None)[0], tf.uint8)

    if fields.InputDataFields.image_additional_channels in out_tensor_dict:
        channels = out_tensor_dict[
            fields.InputDataFields.image_additional_channels]
        out_tensor_dict[fields.InputDataFields.image] = tf.concat(
            [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
        if retain_original_image_additional_channels:
            out_tensor_dict[
                fields.InputDataFields.image_additional_channels] = tf.cast(
                    image_resizer_fn(channels, None)[0], tf.uint8)

    # Apply data augmentation ops.
    if data_augmentation_fn is not None:
        out_tensor_dict = data_augmentation_fn(out_tensor_dict)

    # Apply model preprocessing ops and resize instance masks.
    image = out_tensor_dict[fields.InputDataFields.image]
    preprocessed_resized_image, true_image_shape = model_preprocess_fn(
        tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))

    preprocessed_shape = tf.shape(preprocessed_resized_image)
    new_height, new_width = preprocessed_shape[1], preprocessed_shape[2]

    im_box = tf.stack([
        0.0, 0.0,
        tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]),
        tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1])
    ])

    if fields.InputDataFields.groundtruth_boxes in tensor_dict:
        bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes]
        boxlist = box_list.BoxList(bboxes)
        realigned_bboxes = box_list_ops.change_coordinate_frame(
            boxlist, im_box)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = realigned_bboxes.get()

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
        keypoints = out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints]
        realigned_keypoints = keypoint_ops.change_coordinate_frame(
            keypoints, im_box)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints
        flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints
        flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities
        flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights
        if flds_gt_kpt_vis not in out_tensor_dict:
            out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
                out_tensor_dict[flds_gt_kpt][:, :, 0], dtype=tf.bool)
        out_tensor_dict[flds_gt_kpt_weights] = (
            keypoint_ops.keypoint_weights_from_visibilities(
                out_tensor_dict[flds_gt_kpt_vis], keypoint_type_weight))

    if use_bfloat16:
        preprocessed_resized_image = tf.cast(preprocessed_resized_image,
                                             tf.bfloat16)
    out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
        preprocessed_resized_image, axis=0)
    out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
        true_image_shape, axis=0)
    if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
        masks = out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks]
        _, resized_masks, _ = image_resizer_fn(image, masks)
        if use_bfloat16:
            resized_masks = tf.cast(resized_masks, tf.bfloat16)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks] = resized_masks

    label_offset = 1
    zero_indexed_groundtruth_classes = out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] - label_offset
    if use_multiclass_scores:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
                fields.InputDataFields.multiclass_scores]
    else:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = tf.one_hot(
                zero_indexed_groundtruth_classes, num_classes)
    out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

    if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
        groundtruth_confidences = out_tensor_dict[
            fields.InputDataFields.groundtruth_confidences]
        # Map the confidences to the one-hot encoding of classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            tf.reshape(groundtruth_confidences, [-1, 1]) *
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])
    else:
        groundtruth_confidences = tf.ones_like(
            zero_indexed_groundtruth_classes, dtype=tf.float32)
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])

    if merge_multiple_boxes:
        merged_boxes, merged_classes, merged_confidences, _ = (
            util_ops.merge_boxes_with_multiple_labels(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                zero_indexed_groundtruth_classes, groundtruth_confidences,
                num_classes))
        merged_classes = tf.cast(merged_classes, tf.float32)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = merged_boxes
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = merged_classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            merged_confidences)
    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

    return out_tensor_dict