Exemplo n.º 1
0
    def test_normalized_to_image_coordinates(self):
        normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
        normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
                                        [[0.5, 0.5, 1.0, 1.0]]])
        image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
        absolute_boxes = ops.normalized_to_image_coordinates(
            normalized_boxes, image_shape, parallel_iterations=2)

        expected_boxes = np.array([[[0, 0, 4, 4]], [[2, 2, 4, 4]]])
        with self.test_session() as sess:
            absolute_boxes = sess.run(
                absolute_boxes,
                feed_dict={normalized_boxes: normalized_boxes_np})

        self.assertAllEqual(absolute_boxes, expected_boxes)
Exemplo n.º 2
0
  def test_normalized_to_image_coordinates(self):
    normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
    normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
                                    [[0.5, 0.5, 1.0, 1.0]]])
    image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
    absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
                                                         image_shape,
                                                         parallel_iterations=2)

    expected_boxes = np.array([[[0, 0, 4, 4]],
                               [[2, 2, 4, 4]]])
    with self.test_session() as sess:
      absolute_boxes = sess.run(absolute_boxes,
                                feed_dict={normalized_boxes:
                                           normalized_boxes_np})

    self.assertAllEqual(absolute_boxes, expected_boxes)
Exemplo n.º 3
0
  def _predict_second_stage(self, rpn_box_encodings,
                            rpn_objectness_predictions_with_background,
                            rpn_features,
                            anchors,
                            image_shape,
                            true_image_shapes):
    """Predicts the output tensors from 2nd stage of R-FCN.

    Args:
      rpn_box_encodings: 3-D float tensor of shape
        [batch_size, num_valid_anchors, self._box_coder.code_size] containing
        predicted boxes.
      rpn_objectness_predictions_with_background: 3-D float tensor of shape
        [batch_size, num_valid_anchors, 2] containing class
        predictions (logits) for each of the anchors.  Note that this
        tensor *includes* background class predictions (at class index 0).
      rpn_features: A 4-D float32 tensor with shape
        [batch_size, height, width, depth] representing image features from the
        RPN.
      anchors: 2-D float tensor of shape
        [num_anchors, self._box_coder.code_size].
      image_shape: A 1D int32 tensors of size [4] containing the image shape.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) refined_box_encodings: a 3-D tensor with shape
          [total_num_proposals, num_classes, 4] representing predicted
          (final) refined box encodings, where
          total_num_proposals=batch_size*self._max_num_proposals
        2) class_predictions_with_background: a 2-D tensor with shape
          [total_num_proposals, num_classes + 1] containing class
          predictions (logits) for each of the anchors, where
          total_num_proposals=batch_size*self._max_num_proposals.
          Note that this tensor *includes* background class predictions
          (at class index 0).
        3) num_proposals: An int32 tensor of shape [batch_size] representing the
          number of proposals generated by the RPN. `num_proposals` allows us
          to keep track of which entries are to be treated as zero paddings and
          which are not since we always pad the number of proposals to be
          `self.max_num_proposals` for each image.
        4) proposal_boxes: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing
          decoded proposal bounding boxes (in absolute coordinates).
        5) proposal_boxes_normalized: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing decoded proposal
          bounding boxes (in normalized coordinates). Can be used to override
          the boxes proposed by the RPN, thus enabling one to extract box
          classification and prediction for externally selected areas of the
          image.
        6) box_classifier_features: a 4-D float32 tensor, of shape
          [batch_size, feature_map_height, feature_map_width, depth],
          representing the box classifier features.
    """
    image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0),
                             [image_shape[0], 1])
    proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
        rpn_box_encodings, rpn_objectness_predictions_with_background,
        anchors, image_shape_2d, true_image_shapes)

    box_classifier_features = (
        self._feature_extractor.extract_box_classifier_features(
            rpn_features,
            scope=self.second_stage_feature_extractor_scope))

    if self._rfcn_box_predictor.is_keras_model:
      box_predictions = self._rfcn_box_predictor(
          [box_classifier_features],
          proposal_boxes=proposal_boxes_normalized)
    else:
      box_predictions = self._rfcn_box_predictor.predict(
          [box_classifier_features],
          num_predictions_per_location=[1],
          scope=self.second_stage_box_predictor_scope,
          proposal_boxes=proposal_boxes_normalized)
    refined_box_encodings = tf.squeeze(
        tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], axis=1), axis=1)
    class_predictions_with_background = tf.squeeze(
        tf.concat(
            box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
            axis=1),
        axis=1)

    absolute_proposal_boxes = ops.normalized_to_image_coordinates(
        proposal_boxes_normalized, image_shape,
        parallel_iterations=self._parallel_iterations)

    prediction_dict = {
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background':
        class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': absolute_proposal_boxes,
        'box_classifier_features': box_classifier_features,
        'proposal_boxes_normalized': proposal_boxes_normalized,
    }
    return prediction_dict
Exemplo n.º 4
0
    def _predict_second_stage(self, rpn_box_encodings,
                              rpn_objectness_predictions_with_background,
                              rpn_features, anchors, image_shape):
        """Predicts the output tensors from 2nd stage of FasterRCNN.

        Args:
          rpn_box_encodings: 4-D float tensor of shape
            [batch_size, num_valid_anchors, self._box_coder.code_size] containing
            predicted boxes.
          rpn_objectness_predictions_with_background: 2-D float tensor of shape
            [batch_size, num_valid_anchors, 2] containing class
            predictions (logits) for each of the anchors.  Note that this
            tensor *includes* background class predictions (at class index 0).
          rpn_features: A 4-D float32 tensor with shape
            [batch_size, height, width, depth] representing image features from the
            RPN.
          anchors: 2-D float tensor of shape
            [num_anchors, self._box_coder.code_size].
          image_shape: A 1D int32 tensors of size [4] containing the image shape.

        Returns:
          prediction_dict: a dictionary holding "raw" prediction tensors:
            1) refined_box_encodings: a 3-D tensor with shape
              [total_num_proposals, num_classes, 4] representing predicted
              (final) refined box encodings, where
              total_num_proposals=batch_size*self._max_num_proposals
            2) class_predictions_with_background: a 3-D tensor with shape
              [total_num_proposals, num_classes + 1] containing class
              predictions (logits) for each of the anchors, where
              total_num_proposals=batch_size*self._max_num_proposals.
              Note that this tensor *includes* background class predictions
              (at class index 0).
            3) num_proposals: An int32 tensor of shape [batch_size] representing the
              number of proposals generated by the RPN. `num_proposals` allows us
              to keep track of which entries are to be treated as zero paddings and
              which are not since we always pad the number of proposals to be
              `self.max_num_proposals` for each image.
            4) proposal_boxes: A float32 tensor of shape
              [batch_size, self.max_num_proposals, 4] representing
              decoded proposal bounding boxes (in absolute coordinates).
            5) proposal_boxes_normalized: A float32 tensor of shape
              [batch_size, self.max_num_proposals, 4] representing decoded proposal
              bounding boxes (in normalized coordinates). Can be used to override
              the boxes proposed by the RPN, thus enabling one to extract box
              classification and prediction for externally selected areas of the
              image.
            6) box_classifier_features: a 4-D float32 tensor, of shape
              [batch_size, feature_map_height, feature_map_width, depth],
              representing the box classifier features.
        """
        proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
            rpn_box_encodings, rpn_objectness_predictions_with_background,
            anchors, image_shape)

        box_classifier_features = (
            self._feature_extractor.extract_box_classifier_features(
                rpn_features, scope=self.second_stage_feature_extractor_scope))

        box_predictions = self._rfcn_box_predictor.predict(
            box_classifier_features,
            num_predictions_per_location=1,
            scope=self.second_stage_box_predictor_scope,
            proposal_boxes=proposal_boxes_normalized)
        refined_box_encodings = tf.squeeze(
            box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
        class_predictions_with_background = tf.squeeze(
            box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
            axis=1)

        absolute_proposal_boxes = ops.normalized_to_image_coordinates(
            proposal_boxes_normalized,
            image_shape,
            parallel_iterations=self._parallel_iterations)

        prediction_dict = {
            'refined_box_encodings': refined_box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'num_proposals': num_proposals,
            'proposal_boxes': absolute_proposal_boxes,
            'box_classifier_features': box_classifier_features,
            'proposal_boxes_normalized': proposal_boxes_normalized,
        }
        return prediction_dict
Exemplo n.º 5
0
	        'Extremely hard to get through!!!!!!'

	b: Classification (second stage)
	  1. _predict_second_stage
	  	I: flattened_proposal_feature_maps = self._postprocess_rpn()
      'Very complicate function!!!!!!'
        i: self._format_groundtruth_data(): 
        ii: decoded_boxes = self._box_coder.decode(rpn_box_encodings, box_list.BoxList(anchors))
                            --> faster_rcnn_box_coder.FasterRcnnBoxCoder._decode()
            objectness_scores = tf.nn.softmax(rpn_objectness_predictions_with_background)
        iii:proposal_boxlist = post_processing.multiclass_non_max_suppression()
        iv: padded_proposals = box_list_ops.pad_or_clip_box_list()
	  	II: self._compute_second_stage_input_feature_maps()
	  	III: box_classifier_features = self._feature_extractor.extract_box_classifier_features()
	  	IV: box_predictions = self._mask_rcnn_box_predictor.predict()
	  	V: absolute_proposal_boxes = ops.normalized_to_image_coordinates()

B: losses_dict = detection_model.loss
	a. _loss_rpn
	  1. target_assigner.batch_assign_targets()
      I: target_assigner.assign()
        i: match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,anchors)
                -->sim_calc.IouSimilarity()-->box_list_ops.iou()
        ii: match = self._matcher.match(match_quality_matrix, **params)
                -->argmax_matcher.ArgMaxMatcher._match()
        iii: reg_targets = self._create_regression_targets(anchors,groundtruth_boxes,match)
        iv: cls_targets = self._create_classification_targets(groundtruth_labels,match)
        v: reg_weights = self._create_regression_weights(match)
        vi: cls_weights = self._create_classification_weights(
                        match, self._positive_class_weight, self._negative_class_weight)
    2. localization_losses = self._first_stage_localization_loss
  def _predict_second_stage(self, rpn_box_encodings,
                            rpn_objectness_predictions_with_background,
                            rpn_features,
                            anchors,
                            image_shape):
    """Predicts the output tensors from 2nd stage of FasterRCNN.

    Args:
      rpn_box_encodings: 4-D float tensor of shape
        [batch_size, num_valid_anchors, self._box_coder.code_size] containing
        predicted boxes.
      rpn_objectness_predictions_with_background: 2-D float tensor of shape
        [batch_size, num_valid_anchors, 2] containing class
        predictions (logits) for each of the anchors.  Note that this
        tensor *includes* background class predictions (at class index 0).
      rpn_features: A 4-D float32 tensor with shape
        [batch_size, height, width, depth] representing image features from the
        RPN.
      anchors: 2-D float tensor of shape
        [num_anchors, self._box_coder.code_size].
      image_shape: A 1D int32 tensors of size [4] containing the image shape.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) refined_box_encodings: a 3-D tensor with shape
          [total_num_proposals, num_classes, 4] representing predicted
          (final) refined box encodings, where
          total_num_proposals=batch_size*self._max_num_proposals
        2) class_predictions_with_background: a 3-D tensor with shape
          [total_num_proposals, num_classes + 1] containing class
          predictions (logits) for each of the anchors, where
          total_num_proposals=batch_size*self._max_num_proposals.
          Note that this tensor *includes* background class predictions
          (at class index 0).
        3) num_proposals: An int32 tensor of shape [batch_size] representing the
          number of proposals generated by the RPN.  `num_proposals` allows us
          to keep track of which entries are to be treated as zero paddings and
          which are not since we always pad the number of proposals to be
          `self.max_num_proposals` for each image.
        4) proposal_boxes: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing
          decoded proposal bounding boxes (in absolute coordinates).
    """
    proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
        rpn_box_encodings, rpn_objectness_predictions_with_background,
        anchors, image_shape)

    box_classifier_features = (
        self._feature_extractor.extract_box_classifier_features(
            rpn_features,
            scope=self.second_stage_feature_extractor_scope))

    box_predictions = self._rfcn_box_predictor.predict(
        box_classifier_features,
        num_predictions_per_location=1,
        scope=self.second_stage_box_predictor_scope,
        proposal_boxes=proposal_boxes_normalized)
    refined_box_encodings = tf.squeeze(
        box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
    class_predictions_with_background = tf.squeeze(
        box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
        axis=1)

    absolute_proposal_boxes = ops.normalized_to_image_coordinates(
        proposal_boxes_normalized, image_shape,
        parallel_iterations=self._parallel_iterations)

    prediction_dict = {
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background':
        class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': absolute_proposal_boxes,
    }
    return prediction_dict
Exemplo n.º 7
0
    def _predict_second_stage(self, rpn_box_encodings,
                              rpn_objectness_predictions_with_background,
                              rpn_features, anchors, image_shape,
                              true_image_shapes):
        """Predicts the output tensors from 2nd stage of R-FCN.

    Args:
      rpn_box_encodings: 3-D float tensor of shape
        [batch_size, num_valid_anchors, self._box_coder.code_size] containing
        predicted boxes.
      rpn_objectness_predictions_with_background: 3-D float tensor of shape
        [batch_size, num_valid_anchors, 2] containing class
        predictions (logits) for each of the anchors.  Note that this
        tensor *includes* background class predictions (at class index 0).
      rpn_features: A list of single 4-D float32 tensor with shape
        [batch_size, height, width, depth] representing image features from the
        RPN.
      anchors: 2-D float tensor of shape
        [num_anchors, self._box_coder.code_size].
      image_shape: A 1D int32 tensors of size [4] containing the image shape.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) refined_box_encodings: a 3-D tensor with shape
          [total_num_proposals, num_classes, 4] representing predicted
          (final) refined box encodings, where
          total_num_proposals=batch_size*self._max_num_proposals
        2) class_predictions_with_background: a 2-D tensor with shape
          [total_num_proposals, num_classes + 1] containing class
          predictions (logits) for each of the anchors, where
          total_num_proposals=batch_size*self._max_num_proposals.
          Note that this tensor *includes* background class predictions
          (at class index 0).
        3) num_proposals: An int32 tensor of shape [batch_size] representing the
          number of proposals generated by the RPN. `num_proposals` allows us
          to keep track of which entries are to be treated as zero paddings and
          which are not since we always pad the number of proposals to be
          `self.max_num_proposals` for each image.
        4) proposal_boxes: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing
          decoded proposal bounding boxes (in absolute coordinates).
        5) proposal_boxes_normalized: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing decoded proposal
          bounding boxes (in normalized coordinates). Can be used to override
          the boxes proposed by the RPN, thus enabling one to extract box
          classification and prediction for externally selected areas of the
          image.
        6) box_classifier_features: a 4-D float32 tensor, of shape
          [batch_size, feature_map_height, feature_map_width, depth],
          representing the box classifier features.
    """
        image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0),
                                 [image_shape[0], 1])
        (proposal_boxes_normalized, _, _, num_proposals, _,
         _) = self._postprocess_rpn(
             rpn_box_encodings, rpn_objectness_predictions_with_background,
             anchors, image_shape_2d, true_image_shapes)

        rpn_features = rpn_features[0]
        box_classifier_features = (
            self._extract_box_classifier_features(rpn_features))

        if self._rfcn_box_predictor.is_keras_model:
            box_predictions = self._rfcn_box_predictor(
                [box_classifier_features],
                proposal_boxes=proposal_boxes_normalized)
        else:
            box_predictions = self._rfcn_box_predictor.predict(
                [box_classifier_features],
                num_predictions_per_location=[1],
                scope=self.second_stage_box_predictor_scope,
                proposal_boxes=proposal_boxes_normalized)
        refined_box_encodings = tf.squeeze(tf.concat(
            box_predictions[box_predictor.BOX_ENCODINGS], axis=1),
                                           axis=1)
        class_predictions_with_background = tf.squeeze(tf.concat(
            box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
            axis=1),
                                                       axis=1)

        absolute_proposal_boxes = ops.normalized_to_image_coordinates(
            proposal_boxes_normalized,
            image_shape,
            parallel_iterations=self._parallel_iterations)

        prediction_dict = {
            'refined_box_encodings': refined_box_encodings,
            'class_predictions_with_background':
            class_predictions_with_background,
            'num_proposals': num_proposals,
            'proposal_boxes': absolute_proposal_boxes,
            'box_classifier_features': box_classifier_features,
            'proposal_boxes_normalized': proposal_boxes_normalized,
            'final_anchors': absolute_proposal_boxes
        }
        if self._return_raw_detections_during_predict:
            prediction_dict.update(
                self._raw_detections_and_feature_map_inds(
                    refined_box_encodings, absolute_proposal_boxes))
        return prediction_dict
Exemplo n.º 8
0
    def _box_prediction(self, rpn_features_to_crop, proposal_boxes_normalized,
                        image_shape, true_image_shapes, num_proposals,
                        **side_inputs):
        """Predicts the output tensors from second stage of Faster R-CNN.

    Args:
      rpn_features_to_crop: A list 4-D float32 or bfloat16 tensor with shape
        [batch_size, height_i, width_i, depth] representing image features to
        crop using the proposal boxes predicted by the RPN.
      proposal_boxes_normalized: A float tensor with shape [batch_size,
        max_num_proposals, 4] representing the (potentially zero padded)
        proposal boxes for all images in the batch.  These boxes are represented
        as normalized coordinates.
      image_shape: A 1D int32 tensors of size [4] containing the image shape.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.
      num_proposals: The number of valid box proposals.
      **side_inputs: additional tensors that are required by the network.

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
        1) refined_box_encodings: a 3-D float32 tensor with shape
          [total_num_proposals, num_classes, self._box_coder.code_size]
          representing predicted (final) refined box encodings, where
          total_num_proposals=batch_size*self._max_num_proposals. If using a
          shared box across classes the shape will instead be
          [total_num_proposals, 1, self._box_coder.code_size].
        2) class_predictions_with_background: a 3-D float32 tensor with shape
          [total_num_proposals, num_classes + 1] containing class
          predictions (logits) for each of the anchors, where
          total_num_proposals=batch_size*self._max_num_proposals.
          Note that this tensor *includes* background class predictions
          (at class index 0).
        3) proposal_boxes: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing
          decoded proposal bounding boxes in absolute coordinates.
        4) proposal_boxes_normalized: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing decoded proposal
          bounding boxes in normalized coordinates. Can be used to override the
          boxes proposed by the RPN, thus enabling one to extract features and
          get box classification and prediction for externally selected areas
          of the image.
        5) box_classifier_features: a 4-D float32/bfloat16 tensor
          representing the features for each proposal.
        If self._return_raw_detections_during_predict is True, the dictionary
        will also contain:
        6) raw_detection_boxes: a 4-D float32 tensor with shape
          [batch_size, self.max_num_proposals, num_classes, 4] in normalized
          coordinates.
        7) raw_detection_feature_map_indices: a 3-D int32 tensor with shape
          [batch_size, self.max_num_proposals, num_classes].
        8) final_anchors: a 3-D float tensor of shape [batch_size,
          self.max_num_proposals, 4] containing the reference anchors for raw
          detection boxes in normalized coordinates.
    """
        flattened_proposal_feature_maps = (
            self._compute_second_stage_input_feature_maps(
                rpn_features_to_crop, proposal_boxes_normalized, image_shape,
                num_proposals, **side_inputs))

        box_classifier_features = self._extract_box_classifier_features(
            flattened_proposal_feature_maps, num_proposals, **side_inputs)

        if self._mask_rcnn_box_predictor.is_keras_model:
            box_predictions = self._mask_rcnn_box_predictor(
                [box_classifier_features], prediction_stage=2)
        else:
            box_predictions = self._mask_rcnn_box_predictor.predict(
                [box_classifier_features],
                num_predictions_per_location=[1],
                scope=self.second_stage_box_predictor_scope,
                prediction_stage=2)

        refined_box_encodings = tf.squeeze(
            box_predictions[box_predictor.BOX_ENCODINGS],
            axis=1,
            name='all_refined_box_encodings')
        class_predictions_with_background = tf.squeeze(
            box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
            axis=1,
            name='all_class_predictions_with_background')

        absolute_proposal_boxes = ops.normalized_to_image_coordinates(
            proposal_boxes_normalized, image_shape, self._parallel_iterations)

        prediction_dict = {
            'refined_box_encodings':
            tf.cast(refined_box_encodings, dtype=tf.float32),
            'class_predictions_with_background':
            tf.cast(class_predictions_with_background, dtype=tf.float32),
            'proposal_boxes':
            absolute_proposal_boxes,
            'box_classifier_features':
            box_classifier_features,
            'proposal_boxes_normalized':
            proposal_boxes_normalized,
            'final_anchors':
            proposal_boxes_normalized
        }

        if self._return_raw_detections_during_predict:
            prediction_dict.update(
                self._raw_detections_and_feature_map_inds(
                    refined_box_encodings, absolute_proposal_boxes,
                    true_image_shapes))

        return prediction_dict