Пример #1
0
def last_predict_part(boxes_encodings, classes_predictions_with_background, feature_maps, preprocessed_inputs):
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      boxes_encodings:
      classes_predictions_with_background:
      feature_maps:

      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    """

    anchor_generator = anchor_generator_builder.build('ssd_anchor_generator')

    prediction_dict = post_processor(boxes_encodings, classes_predictions_with_background,
                                     feature_maps, anchor_generator.num_anchors_per_location())

    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)

    feature_map_spatial_dims = get_feature_map_spatial_dims(
        feature_maps)

    anchors = box_list_ops.concatenate(
        anchor_generator.generate(
            feature_map_spatial_dims,
            im_height=image_shape[1],
            im_width=image_shape[2]))

    box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)

    print("tf.concat box_encodings:", box_encodings.name)

    if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
        box_encodings = tf.squeeze(box_encodings, axis=2)

    class_predictions_with_background = tf.concat(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background':
            class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': anchors.get()
    }
    return predictions_dict, anchors
Пример #2
0
        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
Пример #3
0
def get_feature_map_spatial_dims(feature_maps):
    """Return list of spatial dimensions for each feature map in a list.

    Args:
      feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].

    Returns:
      a list of pairs (height, width) for each feature map in feature_maps
    """
    feature_map_shapes = [
        shape_utils.combined_static_and_dynamic_shape(
            feature_map) for feature_map in feature_maps
    ]
    return [(shape[1], shape[2]) for shape in feature_map_shapes]
Пример #4
0
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
  """Matrix multiplication based implementation of tf.gather on zeroth axis.

  TODO(rathodv, jonathanhuang): enable sparse matmul option.

  Args:
    params: A float32 Tensor. The tensor from which to gather values.
      Must be at least rank 1.
    indices: A Tensor. Must be one of the following types: int32, int64.
      Must be in range [0, params.shape[0])
    scope: A name for the operation (optional).

  Returns:
    A Tensor. Has the same type as params. Values from params gathered
    from indices given by indices, with shape indices.shape + params.shape[1:].
  """
  with tf.name_scope(scope, 'MatMulGather'):
    params_shape = shape_utils.combined_static_and_dynamic_shape(params)
    indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
    params2d = tf.reshape(params, [params_shape[0], -1])
    indicator_matrix = tf.one_hot(indices, params_shape[0])
    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
    return tf.reshape(gathered_result_flattened,
                      tf.stack(indices_shape + params_shape[1:]))
Пример #5
0
def _batch_decode_boxes(box_encodings, anchor_boxes):
    """Decodes box encodings with respect to the anchor boxes.

    Args:
      box_encodings: a 4-D tensor with shape
        [batch_size, num_anchors, num_classes, self._box_coder.code_size]
        representing box encodings.
      anchor_boxes: [batch_size, num_anchors, self._box_coder.code_size]
        representing decoded bounding boxes. If using a shared box across
        classes the shape will instead be
        [total_num_proposals, 1, self._box_coder.code_size].

    Returns:
      decoded_boxes: a
        [batch_size, num_anchors, num_classes, self._box_coder.code_size]
        float tensor representing bounding box predictions (for each image in
        batch, proposal and class). If using a shared box across classes the
        shape will instead be
        [batch_size, num_anchors, 1, self._box_coder.code_size].
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    num_classes = combined_shape[2]

    tiled_anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, 2),
                                 [1, 1, num_classes, 1])
    print("tiled_anchor_boxes:", tiled_anchor_boxes.name)
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))

    _proposal_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN', 'proposal')
    _box_coder = _proposal_target_assigner.box_coder

    decoded_boxes = _box_coder.decode(
        tf.reshape(box_encodings, [-1, _box_coder.code_size]),
        tiled_anchors_boxlist)

    print("combined_shape[0]:", combined_shape[0])
    print("combined_shape[1]:", combined_shape[1])
    print("num_classes:", num_classes)
    print("decoded_boxes.get():", decoded_boxes.get())

    decoded_boxes_reahpe = tf.reshape(
        decoded_boxes.get(),
        tf.stack([combined_shape[0], combined_shape[1], num_classes, 4]))

    return decoded_boxes_reahpe
Пример #6
0
        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches
Пример #7
0
def _flatten_first_two_dimensions(inputs):
    """Flattens `K-d` tensor along batch dimension to be a `(K-1)-d` tensor.

    Converts `inputs` with shape [A, B, ..., depth] into a tensor of shape
    [A * B, ..., depth].

    Args:
      inputs: A float tensor with shape [A, B, ..., depth].  Note that the first
        two and last dimensions must be statically defined.
    Returns:
      A float tensor with shape [A * B, ..., depth] (where the first and last
        dimension are statically defined.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
    flattened_shape = tf.stack([combined_shape[0] * combined_shape[1]] +
                               combined_shape[2:])
    return tf.reshape(inputs, flattened_shape)
Пример #8
0
def _batch_decode(anchors, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))

    box_coder = box_coder_builder.build("faster_rcnn_box_coder")

    decoded_boxes = box_coder.decode(
        tf.reshape(box_encodings, [-1, box_coder.code_size]),
        tiled_anchors_boxlist)
    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
        decoded_keypoints = decoded_boxes.get_field(
            fields.BoxListFields.keypoints)
        num_keypoints = decoded_keypoints.get_shape()[1]
        decoded_keypoints = tf.reshape(
            decoded_keypoints,
            tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
        [combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints
Пример #9
0
def _compute_clip_window(preprocessed_images, true_image_shapes):
    """Computes clip window to use during post_processing.

    Computes a new clip window to use during post-processing based on
    `resized_image_shapes` and `true_image_shapes` only if `preprocess` method
    has been called. Otherwise returns a default clip window of [0, 0, 1, 1].

    Args:
      preprocessed_images: the [batch, height, width, channels] image
          tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros. Or None if the clip window should cover the full image.

    Returns:
      a 2-D float32 tensor of the form [batch_size, 4] containing the clip
      window for each image in the batch in normalized coordinates (relative to
      the resized dimensions) where each clip window is of the form [ymin, xmin,
      ymax, xmax] or a default clip window of [0, 0, 1, 1].

    """
    if true_image_shapes is None:
        return tf.constant([0, 0, 1, 1], dtype=tf.float32)

    resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_images)
    true_heights, true_widths, _ = tf.unstack(
        tf.to_float(true_image_shapes), axis=1)
    padded_height = tf.to_float(resized_inputs_shape[1])
    padded_width = tf.to_float(resized_inputs_shape[2])
    return tf.stack(
        [
            tf.zeros_like(true_heights),
            tf.zeros_like(true_widths), true_heights / padded_height,
                                        true_widths / padded_width
        ],
        axis=1)
Пример #10
0
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                fields.BoxListFields.keypoints)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets
Пример #11
0
def nearest_neighbor_upsampling(input_tensor, scale):
  """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
  [batch_size, height, width, channels] to [batch_size, height * scale
  , width * scale, channels]. This implementation only uses reshape and
  broadcasting to make it TPU compatible.

  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
    scale: An integer multiple to scale resolution of input data.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
  """
  with tf.name_scope('nearest_neighbor_upsampling'):
    (batch_size, height, width,
     channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
    output_tensor = tf.reshape(
        input_tensor, [batch_size, height, 1, width, 1, channels]) * tf.ones(
            [1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
    return tf.reshape(output_tensor,
                      [batch_size, height * scale, width * scale, channels])
Пример #12
0
def _postprocess_rpn(rpn_box_encodings_batch,
                     rpn_objectness_predictions_with_background_batch, anchors,
                     image_shapes, first_stage_max_proposals):
    """Converts first stage prediction tensors from the RPN to proposals.

    This function decodes the raw RPN predictions, runs non-max suppression
    on the result.

    Note that the behavior of this function is slightly modified during
    training --- specifically, we stop the gradient from passing through the
    proposal boxes and we only return a balanced sampled subset of proposals
    with size `second_stage_batch_size`.

    Args:
      rpn_box_encodings_batch: A 3-D float32 tensor of shape
        [batch_size, num_anchors, self._box_coder.code_size] containing
        predicted proposal box encodings.
      rpn_objectness_predictions_with_background_batch: A 3-D float tensor of
        shape [batch_size, num_anchors, 2] containing objectness predictions
        (logits) for each of the anchors with 0 corresponding to background
        and 1 corresponding to object.
      anchors: A 2-D tensor of shape [num_anchors, 4] representing anchors
        for the first stage RPN.  Note that `num_anchors` can differ depending
        on whether the model is created in training or inference mode.
      image_shapes: A 2-D tensor of shape [batch, 3] containing the shapes of
        images in the batch.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      proposal_boxes: A float tensor with shape
        [batch_size, max_num_proposals, 4] representing the (potentially zero
        padded) proposal boxes for all images in the batch.  These boxes are
        represented as normalized coordinates.
      proposal_scores:  A float tensor with shape
        [batch_size, max_num_proposals] representing the (potentially zero
        padded) proposal objectness scores for all images in the batch.
      num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
        representing the number of proposals predicted for each image in
        the batch.
    """
    first_stage_nms_score_threshold = 0.0
    first_stage_nms_iou_threshold = 0.7
    first_stage_max_proposals = first_stage_max_proposals

    rpn_box_encodings_batch = tf.expand_dims(rpn_box_encodings_batch, axis=2)

    print("rpn_box_encodings_batch name:", rpn_box_encodings_batch.name)
    print("rpn_box_encodings_batch: shape", rpn_box_encodings_batch.shape)
    rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape(
        rpn_box_encodings_batch)
    tiled_anchor_boxes = tf.tile(tf.expand_dims(anchors, 0),
                                 [rpn_encodings_shape[0], 1, 1])
    print("_batch_decode_boxes 1")
    proposal_boxes = _batch_decode_boxes(rpn_box_encodings_batch,
                                         tiled_anchor_boxes)

    proposal_boxes = tf.squeeze(proposal_boxes, axis=2)
    rpn_objectness_softmax_without_background = tf.nn.softmax(
        rpn_objectness_predictions_with_background_batch)[:, :, 1]
    clip_window = _compute_clip_window(image_shapes)

    (proposal_boxes, proposal_scores, _, _, _,
     num_proposals) = post_processing.batch_multiclass_non_max_suppression(
         tf.expand_dims(proposal_boxes, axis=2),
         tf.expand_dims(rpn_objectness_softmax_without_background, axis=2),
         first_stage_nms_score_threshold,
         first_stage_nms_iou_threshold,
         first_stage_max_proposals,
         first_stage_max_proposals,
         clip_window=clip_window)

    # normalize proposal boxes
    def normalize_boxes(args):
        proposal_boxes_per_image = args[0]
        image_shape = args[1]
        normalized_boxes_per_image = box_list_ops.to_normalized_coordinates(
            box_list.BoxList(proposal_boxes_per_image),
            image_shape[0],
            image_shape[1],
            check_range=False).get()
        return normalized_boxes_per_image

    normalized_proposal_boxes = shape_utils.static_or_dynamic_map_fn(
        normalize_boxes,
        elems=[proposal_boxes, image_shapes],
        dtype=tf.float32)
    return normalized_proposal_boxes, proposal_scores, num_proposals
Пример #13
0
def second_stage_box_predictor(preprocessed_inputs, box_encoding_reshape,
                               class_prediction_reshape, rpn_features_to_crop,
                               rpn_box_encodings,
                               rpn_objectness_predictions_with_background,
                               true_image_shapes, rpn_box_predictor_features):

    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    first_stage_anchor_generator = anchor_generator_builder.build(
        "grid_anchor_generator")
    # The Faster R-CNN paper recommends pruning anchors that venture outside
    # the image window at training time and clipping at inference time.
    clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
    feature_map_shape = tf.shape(rpn_features_to_crop)

    anchors_boxlist = box_list_ops.concatenate(
        first_stage_anchor_generator.generate([(feature_map_shape[1],
                                                feature_map_shape[2])]))
    anchors_boxlist = box_list_ops.clip_to_window(anchors_boxlist, clip_window)
    _anchors = anchors_boxlist

    print("second_stage_box_predictor _postprocess_rpn")

    image_shape_2d = _image_batch_shape_2d(image_shape)

    num_anchors_per_location = (
        first_stage_anchor_generator.num_anchors_per_location())

    if len(num_anchors_per_location) != 1:
        raise RuntimeError('anchor_generator is expected to generate anchors '
                           'corresponding to a single feature map.')
    box_predictions = _first_stage_box_predictor_predict(
        [rpn_box_predictor_features], [rpn_box_encodings],
        [rpn_objectness_predictions_with_background], num_anchors_per_location)

    predictions_box_encodings = tf.concat(box_predictions[BOX_ENCODINGS],
                                          axis=1)

    print("squeeze predictions_box_encodings.shape:",
          predictions_box_encodings.shape)

    rpn_box_encodings = tf.squeeze(predictions_box_encodings, axis=2)

    print("rpn_box_encodings.shape:", rpn_box_encodings.shape)

    rpn_objectness_predictions_with_background = tf.concat(
        box_predictions[CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)

    proposal_boxes_normalized, _, num_proposals = _postprocess_rpn(
        rpn_box_encodings,
        rpn_objectness_predictions_with_background,
        _anchors.get(),
        image_shape_2d,
        first_stage_max_proposals=100)

    prediction_dict = {
        'rpn_box_predictor_features':
        rpn_box_predictor_features,
        'rpn_features_to_crop':
        rpn_features_to_crop,
        'image_shape':
        image_shape,
        'rpn_box_encodings':
        rpn_box_encodings,
        'rpn_objectness_predictions_with_background':
        rpn_objectness_predictions_with_background,
    }

    refined_box_encodings = tf.squeeze(box_encoding_reshape,
                                       axis=1,
                                       name='all_refined_box_encodings')
    class_predictions_with_background = tf.squeeze(
        class_prediction_reshape,
        axis=1,
        name='all_class_predictions_with_background')
    _parallel_iterations = 16
    absolute_proposal_boxes = ops.normalized_to_image_coordinates(
        proposal_boxes_normalized, image_shape, _parallel_iterations)

    prediction_dict1 = {
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': absolute_proposal_boxes,
    }

    prediction_dict.update(prediction_dict1)

    result_output = second_postprocess(prediction_dict, true_image_shapes)

    return result_output
Пример #14
0
def _first_stage_box_predictor_predict(image_features, box_encodings,
                                       class_predictions_with_backgrounds,
                                       num_predictions_per_locations):
    """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
        width_i, channels_i] containing features for a batch of images.
      num_predictions_per_location_list: A list of integers representing the
        number of box predictions to be made per spatial location for each
        feature map.

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.


    """
    box_encodings_list = []
    class_predictions_list = []
    num_classes = 1
    num_class_slots = num_classes + 1
    _proposal_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN', 'proposal')
    _box_coder = _proposal_target_assigner.box_coder
    _box_code_size = _box_coder.code_size

    for (image_feature, box_encoding, class_predictions_with_background,
         num_predictions_per_location) in zip(
             image_features, box_encodings, class_predictions_with_backgrounds,
             num_predictions_per_locations):
        combined_feature_map_shape = (
            shape_utils.combined_static_and_dynamic_shape(image_feature))
        print("_box_code_size:", _box_code_size)
        print("num_predictions_per_location:", num_predictions_per_location)
        print("combined_feature_map_shape[1]:", combined_feature_map_shape[1])
        print("combined_feature_map_shape[2]:", combined_feature_map_shape[2])
        print("box_encodings:", box_encoding.shape)

        shapes = tf.stack([
            combined_feature_map_shape[0], combined_feature_map_shape[1] *
            combined_feature_map_shape[2] * num_predictions_per_location, 1,
            _box_code_size
        ])

        box_encoding_reshape = tf.reshape(box_encoding, shapes)
        print("box_encoding_reshape:", box_encoding_reshape.shape)

        box_encodings_list.append(box_encoding_reshape)

        class_predictions_with_background = tf.reshape(
            class_predictions_with_background,
            tf.stack([
                combined_feature_map_shape[0], combined_feature_map_shape[1] *
                combined_feature_map_shape[2] * num_predictions_per_location,
                num_class_slots
            ]))

        class_predictions_list.append(class_predictions_with_background)

    return {
        BOX_ENCODINGS: box_encodings_list,
        CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
    }
Пример #15
0
    def assign(self,
               anchors,
               groundtruth_boxes,
               groundtruth_labels=None,
               groundtruth_weights=None,
               **params):
        """Assign classification and regression targets to each anchor.

    For a given set of anchors and groundtruth detections, match anchors
    to groundtruth_boxes and assign classification and regression targets to
    each anchor as well as weights based on the resulting match (specifying,
    e.g., which anchors should not contribute to training loss).

    Anchors that are not matched to anything are given a classification target
    of self._unmatched_cls_target which can be specified via the constructor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.

    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
      cls_weights: a float32 tensor with shape [num_anchors]
      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
      reg_weights: a float32 tensor with shape [num_anchors]
      match: a matcher.Match object encoding the match between anchors and
        groundtruth boxes, with rows corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Raises:
      ValueError: if anchors or groundtruth_boxes are not of type
        box_list.BoxList
    """
        if not isinstance(anchors, box_list.BoxList):
            raise ValueError('anchors must be an BoxList')
        if not isinstance(groundtruth_boxes, box_list.BoxList):
            raise ValueError('groundtruth_boxes must be an BoxList')

        if groundtruth_labels is None:
            groundtruth_labels = tf.ones(
                tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
            groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
        unmatched_shape_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [1:],
            shape_utils.combined_static_and_dynamic_shape(
                self._unmatched_cls_target))
        labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
            shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)
            [:1],
            shape_utils.combined_static_and_dynamic_shape(
                groundtruth_boxes.get())[:1])

        if groundtruth_weights is None:
            num_gt_boxes = groundtruth_boxes.num_boxes_static()
            if not num_gt_boxes:
                num_gt_boxes = groundtruth_boxes.num_boxes()
            groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
        with tf.control_dependencies(
            [unmatched_shape_assert, labels_and_box_shapes_assert]):
            match_quality_matrix = self._similarity_calc.compare(
                groundtruth_boxes, anchors)
            match = self._matcher.match(match_quality_matrix, **params)
            reg_targets = self._create_regression_targets(
                anchors, groundtruth_boxes, match)
            cls_targets = self._create_classification_targets(
                groundtruth_labels, match)
            reg_weights = self._create_regression_weights(
                match, groundtruth_weights)
            cls_weights = self._create_classification_weights(
                match, groundtruth_weights)

        num_anchors = anchors.num_boxes_static()
        if num_anchors is not None:
            reg_targets = self._reset_target_shape(reg_targets, num_anchors)
            cls_targets = self._reset_target_shape(cls_targets, num_anchors)
            reg_weights = self._reset_target_shape(reg_weights, num_anchors)
            cls_weights = self._reset_target_shape(cls_weights, num_anchors)

        return cls_targets, cls_weights, reg_targets, reg_weights, match