Пример #1
0
    def testBasic(self):
        """
        Test basic output of the FasterCnnNetwork
        """
        results = self._run_network()
        class_prediction = results['classification_prediction']
        rpn_prediction = results['rpn_prediction']

        # Check that every object is defined by 4 coordinates
        self.assertEqual(
            class_prediction['objects'].shape[1],
            4
        )

        # Check we get objects clipped to the image.
        self.assertAllEqual(
            clip_boxes(class_prediction['objects'], self.image_size),
            class_prediction['objects']
        )

        self.assertEqual(
            class_prediction['labels'].shape[0],
            class_prediction['objects'].shape[0]
        )

        # Check that every object label is less or equal than 'num_classes'
        self.assertTrue(
            np.less_equal(class_prediction['labels'],
                          self.config.model.network.num_classes).all()
        )

        # Check that the sum of class probabilities is 1
        self.assertAllClose(
            np.sum(class_prediction['rcnn']['cls_prob'], axis=1),
            np.ones((class_prediction['rcnn']['cls_prob'].shape[0]))
        )

        # Check that the sum of rpn class probabilities is 1
        self.assertAllClose(
            np.sum(rpn_prediction['rpn_cls_prob'], axis=1),
            np.ones((rpn_prediction['rpn_cls_prob'].shape[0]))
        )

        # Check that every rpn proposal has 4 coordinates
        self.assertEqual(
            rpn_prediction['proposals'].shape[1],
            4
        )

        # Check we get rpn proposals clipped to the image.
        self.assertAllEqual(
            clip_boxes(rpn_prediction['proposals'], self.image_size),
            rpn_prediction['proposals']
        )
    def testBasic(self):
        """
        Test basic output of the FasterCnnNetwork
        """
        results = self._run_network()
        class_prediction = results['classification_prediction']
        rpn_prediction = results['rpn_prediction']

        # Check that every object is defined by 4 coordinates
        self.assertEqual(
            class_prediction['objects'].shape[1],
            4
        )

        # Check we get objects clipped to the image.
        self.assertAllEqual(
            clip_boxes(class_prediction['objects'], self.image_size),
            class_prediction['objects']
        )

        self.assertEqual(
            class_prediction['labels'].shape[0],
            class_prediction['objects'].shape[0]
        )

        # Check that every object label is less or equal than 'num_classes'
        self.assertTrue(
            np.less_equal(class_prediction['labels'],
                          self.config.model.network.num_classes).all()
        )

        # Check that the sum of class probabilities is 1
        self.assertAllClose(
            np.sum(class_prediction['rcnn']['cls_prob'], axis=1),
            np.ones((class_prediction['rcnn']['cls_prob'].shape[0]))
        )

        # Check that the sum of rpn class probabilities is 1
        self.assertAllClose(
            np.sum(rpn_prediction['rpn_cls_prob'], axis=1),
            np.ones((rpn_prediction['rpn_cls_prob'].shape[0]))
        )

        # Check that every rpn proposal has 4 coordinates
        self.assertEqual(
            rpn_prediction['proposals'].shape[1],
            4
        )

        # Check we get rpn proposals clipped to the image.
        self.assertAllEqual(
            clip_boxes(rpn_prediction['proposals'], self.image_size),
            rpn_prediction['proposals']
        )
Пример #3
0
    def testAnchors(self):
        """
        Tests about the anchors generated by the FasterRCNN
        """
        results = self._run_network()

        # Check we get anchors clipped to the image.
        self.assertAllEqual(
            clip_boxes(results['all_anchors'], self.image_size),
            results['all_anchors']
        )

        feature_map = np.random.randint(low=0, high=255, size=(1, 32, 32, 1))
        config = self.config
        config.model.anchors.base_size = 16
        config.model.anchors.scales = [0.5, 1, 2]
        config.model.anchors.ratios = [0.5, 1, 2]
        config.model.anchors.stride = 1  # image is 32 x 32

        anchors = self._gen_anchors(config, feature_map.shape)

        # Check the amount of anchors generated is correct:
        # 9216 = 32^2 * config.anchor.scales * config.anchor.ratios = 1024 * 9
        self.assertEqual(anchors.shape, (9216, 4))

        anchor_widths = anchors[:, 2] - anchors[:, 0]
        anchor_heights = anchors[:, 3] - anchors[:, 1]

        # Since we are using equal scales and ratios, the set of unique heights
        # and widths must be the same.
        self.assertAllEqual(
            np.unique(anchor_widths), np.unique(anchor_heights)
        )

        anchor_areas = anchor_widths * anchor_heights

        # We have 9 possible anchors areas, minus 3 repeated ones. 6 unique.
        self.assertAllEqual(np.unique(anchor_areas).shape[0], 6)

        # Check the anchors cover all the image.
        # TODO: Check with values calculated from config.
        self.assertEqual(np.min(anchors[:, 0]), -22)
        self.assertEqual(np.max(anchors[:, 0]), 29)

        self.assertEqual(np.min(anchors[:, 1]), -22)
        self.assertEqual(np.max(anchors[:, 1]), 29)

        self.assertEqual(np.min(anchors[:, 2]), 2)
        self.assertEqual(np.max(anchors[:, 2]), 53)

        self.assertEqual(np.min(anchors[:, 3]), 2)
        self.assertEqual(np.max(anchors[:, 3]), 53)

        stride = config.model.anchors.stride
        # Check values are sequential.
        self._assert_sequential_values(anchors[:, 0], stride)
        self._assert_sequential_values(anchors[:, 1], stride)
        self._assert_sequential_values(anchors[:, 2], stride)
        self._assert_sequential_values(anchors[:, 3], stride)
    def testAnchors(self):
        """
        Tests about the anchors generated by the FasterRCNN
        """
        results = self._run_network()

        # Check we get anchors clipped to the image.
        self.assertAllEqual(
            clip_boxes(results['all_anchors'], self.image_size),
            results['all_anchors']
        )

        feature_map = np.random.randint(low=0, high=255, size=(1, 32, 32, 1))
        config = self.config
        config.model.anchors.base_size = 16
        config.model.anchors.scales = [0.5, 1, 2]
        config.model.anchors.ratios = [0.5, 1, 2]
        config.model.anchors.stride = 1  # image is 32 x 32

        anchors = self._gen_anchors(config, feature_map.shape)

        # Check the amount of anchors generated is correct:
        # 9216 = 32^2 * config.anchor.scales * config.anchor.ratios = 1024 * 9
        self.assertEqual(anchors.shape, (9216, 4))

        anchor_widths = anchors[:, 2] - anchors[:, 0]
        anchor_heights = anchors[:, 3] - anchors[:, 1]

        # Since we are using equal scales and ratios, the set of unique heights
        # and widths must be the same.
        self.assertAllEqual(
            np.unique(anchor_widths), np.unique(anchor_heights)
        )

        anchor_areas = anchor_widths * anchor_heights

        # We have 9 possible anchors areas, minus 3 repeated ones. 6 unique.
        self.assertAllEqual(np.unique(anchor_areas).shape[0], 6)

        # Check the anchors cover all the image.
        # TODO: Check with values calculated from config.
        self.assertEqual(np.min(anchors[:, 0]), -22)
        self.assertEqual(np.max(anchors[:, 0]), 29)

        self.assertEqual(np.min(anchors[:, 1]), -22)
        self.assertEqual(np.max(anchors[:, 1]), 29)

        self.assertEqual(np.min(anchors[:, 2]), 2)
        self.assertEqual(np.max(anchors[:, 2]), 53)

        self.assertEqual(np.min(anchors[:, 3]), 2)
        self.assertEqual(np.max(anchors[:, 3]), 53)

        stride = config.model.anchors.stride
        # Check values are sequential.
        self._assert_sequential_values(anchors[:, 0], stride)
        self._assert_sequential_values(anchors[:, 1], stride)
        self._assert_sequential_values(anchors[:, 2], stride)
        self._assert_sequential_values(anchors[:, 3], stride)
Пример #5
0
    def _build(self, image, gt_boxes=None, is_training=False):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            A dictionary with the following keys:
            predictions: 预测结果
                proposal_prediction: 关于提案的信息
                A dictionary with:
                    proposals: The proposals of the network after appling some
                        filters like negative area; and NMS
                        最终保留下来的提案
                    proposals_label: A tensor with the label for each proposal.
                        对于每个提案的标签判定
                    proposals_label_prob: A tensor with the softmax probability
                        for the label of each proposal.
                        对于每个提案的softmax概率
                bbox_offsets: A tensor with the predicted bbox_offsets
                    预测的边界框的偏移量
                class_scores: A tensor with the predicted classes scores
                    预测的类列得分/置信度
        """
        # Reshape image
        self.image_shape.append(3)  # Add channels to shape
        image.set_shape(self.image_shape)
        image = tf.expand_dims(image, 0, name='hardcode_batch_size_to_1')

        # Generate feature maps from image
        self.feature_extractor = SSDFeatureExtractor(
            self._config.base_network, parent_name=self.module_name)
        # 获取特征图
        # ques: 这里的特征图对应的是多个卷积层还是只是一个卷积层的输出
        # ans: 是所有需要研究的特征图
        feature_maps = self.feature_extractor(image, is_training=is_training)

        # Build a MultiBox predictor on top of each feature layer and collect
        # the bounding box offsets and the category score logits they produce
        bbox_offsets_list = []
        class_scores_list = []
        # 对于不同的输出特征图进行遍历, 进行预测
        for i, feat_map in enumerate(feature_maps.values()):
            multibox_predictor_name = 'MultiBox_{}'.format(i)
            with tf.name_scope(multibox_predictor_name):
                # 这里使得预测的结果的数量和后面生成anchors的数量是一致的,
                # 并且也是对应的
                num_anchors = self._anchors_per_point[i]

                # Predict bbox offsets
                # 用3x3卷积预测坐标偏移量
                bbox_offsets_layer = Conv2D(num_anchors * 4, [3, 3],
                                            name=multibox_predictor_name +
                                            '_offsets_conv')(feat_map)

                # (HxWxnum_anchors, 4) 这里的H,W应该是和上面特征图的H,W是一致的
                bbox_offsets_flattened = tf.reshape(bbox_offsets_layer,
                                                    [-1, 4])
                # 获取所有的预测框的偏移量
                bbox_offsets_list.append(bbox_offsets_flattened)

                # Predict class scores
                # 使用3x3卷积预测类别(包含背景类)
                class_scores_layer = Conv2D(
                    num_anchors * (self._num_classes + 1),
                    [3, 3],
                    name=multibox_predictor_name + '_classes_conv',
                )(feat_map)
                class_scores_flattened = tf.reshape(
                    class_scores_layer, [-1, self._num_classes + 1])
                # 获取所有的预测框类别判定
                class_scores_list.append(class_scores_flattened)

        # 组合所有的预测边界框偏移量, 类别得分, 并计算对应的softmax概率
        # (num_bboxes, 4) (相对于后面要生成的anchors) ###########################
        bbox_offsets = tf.concat(bbox_offsets_list,
                                 axis=0,
                                 name='concatenate_all_bbox_offsets')
        # (num_bboxes, 21) 对应得分
        class_scores = tf.concat(class_scores_list,
                                 axis=0,
                                 name='concatenate_all_class_scores')
        # (num_bboxes, 21) 对应概率 (也是在针对后面要生成的anchors) ###############
        class_probabilities = tf.nn.softmax(class_scores,
                                            axis=-1,
                                            name='class_probabilities_softmax')

        # 这里的anchors不同于上面的预测结果, 而是根据特征图生成的参考框 ###############

        # Generate anchors (generated only once, therefore we use numpy)
        # 基于各个卷积层的特征图, 使用anchor参数, 生成所有的anchors(坐标基于特征图)
        raw_anchors_per_featmap = generate_raw_anchors(feature_maps,
                                                       self._anchor_min_scale,
                                                       self._anchor_max_scale,
                                                       self._anchor_ratios,
                                                       self._anchors_per_point)

        anchors_list = []
        # 遍历所有的特征图, 将其映射到原图, 并进行剪裁
        for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()):
            # TODO: Anchor generation should be simpler. We should create
            #       them in image scale from the start instead of scaling
            #       them to their feature map size.
            # 这里的feat_map大大小应该是(num_batch, height, weight, channel)
            feat_map_shape = feat_map.shape.as_list()[1:3]
            # anchors从特征图映射到原图(坐标基于原图)
            scaled_bboxes = adjust_bboxes(
                raw_anchors_per_featmap[feat_map_name], feat_map_shape[0],
                feat_map_shape[1], self.image_shape[0], self.image_shape[1])
            clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape)
            anchors_list.append(clipped_bboxes)
        # 将所有的anchors的原图上的坐标结果进行合并
        anchors = np.concatenate(anchors_list, axis=0)
        anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)

        # This is the dict we'll return after filling it with SSD's results
        prediction_dict = {}

        # Generate targets for training
        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)

            # Generate targets
            target_creator = SSDTarget(self._num_classes, self._config.target,
                                       self._config.variances)
            # 返回各个anchor对应的类别标签(0~21),以及前景anchors位置上更新的对应真实框
            # 相对自身坐标的偏移量和缩放量(其余位置为0)
            # 这里的类别标签, 是对那些
            # 1. 在所有真实框中的最大的IoU值大于阈值的anchors,
            # 2. 以及那些所有真实框的最好anchors
            # 根据这些对应的最好的真实框来确定的, 这里使用IoU来确定的正样本的
            # class_targets, 和预测值无关, 只是背景样本里用了下预测的类别概率
            class_targets, bbox_offsets_targets = target_creator(
                class_probabilities, anchors, gt_boxes)

            # Filter the predictions and targets that we will ignore during
            # training due to hard negative mining. We use class_targets to
            # know which ones to ignore (they are marked as -1 if they are to
            # be ignored)
            # 确定前景anchors对应的各类数据, 包括:
            #   参考的anchors
            #   真实框 相对于anchors的偏移缩放
            #   anchors的对应的类别标签
            #   anchors对应的类别预测得分
            #   anchors对应的类别预测概率
            #   预测出的 提案框相对于anchors的偏移缩放
            # note: 这里对于每一个anchors都是预测一组偏移量, 是一一对应的,
            #  而且这里实际上只保留了对应与类别标签大于等于0的anchors对应的结果
            # boolean_mask 只保留filter上为True的对应的数据
            with tf.name_scope('hard_negative_mining_filter'):
                predictions_filter = tf.greater_equal(class_targets, 0)
                anchors = tf.boolean_mask(anchors, predictions_filter)
                bbox_offsets_targets = tf.boolean_mask(bbox_offsets_targets,
                                                       predictions_filter)
                class_targets = tf.boolean_mask(class_targets,
                                                predictions_filter)
                class_scores = tf.boolean_mask(class_scores,
                                               predictions_filter)
                class_probabilities = tf.boolean_mask(class_probabilities,
                                                      predictions_filter)
                bbox_offsets = tf.boolean_mask(bbox_offsets,
                                               predictions_filter)

            # Add target tensors to prediction dict
            # 和真实值有关系的几个数据
            prediction_dict['target'] = {
                'cls': class_targets,
                'bbox_offsets': bbox_offsets_targets,
                'anchors': anchors
            }

        # Add network's raw output to prediction dict
        # 和预测相关的几个数据
        prediction_dict['cls_pred'] = class_scores
        prediction_dict['loc_pred'] = bbox_offsets

        # 到此为止, 得到了所有的anchors调整后的预测结果, 但是这时候的结果并没有其他的处理
        # 只是在训练的时候就可以了, 但是预测或者调试输出的时候, 数据还需要进行进一步筛选

        # We generate proposals when predicting, or when debug=True for
        # generating visualizations during training.
        # 进入这里的都是调整后的预测结果, 可以认为是网络的预测候选, 后续操作主要是挑选
        if not is_training or self._debug:
            proposals_creator = SSDProposal(self._num_classes,
                                            self._config.proposals,
                                            self._config.variances)
            proposals = proposals_creator(
                class_probabilities, bbox_offsets, anchors,
                tf.cast(tf.shape(image)[1:3], tf.float32))
            prediction_dict['classification_prediction'] = proposals

        # Add some non essential metrics for debugging
        if self._debug:
            prediction_dict['all_anchors'] = anchors
            prediction_dict['cls_prob'] = class_probabilities

        return prediction_dict
Пример #6
0
    def _build(self, image, gt_boxes=None, is_training=False):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            A dictionary with the following keys:
            predictions:
            proposal_prediction: A dictionary with:
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS
                proposals_label: A tensor with the label for each proposal.
                proposals_label_prob: A tensor with the softmax probability
                    for the label of each proposal.
            bbox_offsets: A tensor with the predicted bbox_offsets
            class_scores: A tensor with the predicted classes scores
        """
        # Reshape image
        self.image_shape.append(3)  # Add channels to shape
        image.set_shape(self.image_shape)
        image = tf.expand_dims(image, 0, name='hardcode_batch_size_to_1')

        # Generate feature maps from image
        self.feature_extractor = SSDFeatureExtractor(
            self._config.base_network, parent_name=self.module_name)
        feature_maps = self.feature_extractor(image, is_training=is_training)

        # Build a MultiBox predictor on top of each feature layer and collect
        # the bounding box offsets and the category score logits they produce
        bbox_offsets_list = []
        class_scores_list = []
        for i, feat_map in enumerate(feature_maps.values()):
            multibox_predictor_name = 'MultiBox_{}'.format(i)
            with tf.name_scope(multibox_predictor_name):
                num_anchors = self._anchors_per_point[i]

                # Predict bbox offsets
                bbox_offsets_layer = Conv2D(num_anchors * 4, [3, 3],
                                            name=multibox_predictor_name +
                                            '_offsets_conv')(feat_map)
                bbox_offsets_flattened = tf.reshape(bbox_offsets_layer,
                                                    [-1, 4])
                bbox_offsets_list.append(bbox_offsets_flattened)

                # Predict class scores
                class_scores_layer = Conv2D(
                    num_anchors * (self._num_classes + 1),
                    [3, 3],
                    name=multibox_predictor_name + '_classes_conv',
                )(feat_map)
                class_scores_flattened = tf.reshape(
                    class_scores_layer, [-1, self._num_classes + 1])
                class_scores_list.append(class_scores_flattened)
        bbox_offsets = tf.concat(bbox_offsets_list,
                                 axis=0,
                                 name='concatenate_all_bbox_offsets')
        class_scores = tf.concat(class_scores_list,
                                 axis=0,
                                 name='concatenate_all_class_scores')
        class_probabilities = tf.nn.softmax(class_scores,
                                            axis=-1,
                                            name='class_probabilities_softmax')

        # Generate anchors (generated only once, therefore we use numpy)
        raw_anchors_per_featmap = generate_raw_anchors(feature_maps,
                                                       self._anchor_min_scale,
                                                       self._anchor_max_scale,
                                                       self._anchor_ratios,
                                                       self._anchors_per_point)
        anchors_list = []
        for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()):
            # TODO: Anchor generation should be simpler. We should create
            #       them in image scale from the start instead of scaling
            #       them to their feature map size.
            feat_map_shape = feat_map.shape.as_list()[1:3]
            scaled_bboxes = adjust_bboxes(
                raw_anchors_per_featmap[feat_map_name], feat_map_shape[0],
                feat_map_shape[1], self.image_shape[0], self.image_shape[1])
            clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape)
            anchors_list.append(clipped_bboxes)
        anchors = np.concatenate(anchors_list, axis=0)
        anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)

        # This is the dict we'll return after filling it with SSD's results
        prediction_dict = {}

        # Generate targets for training
        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)

            # Generate targets
            target_creator = SSDTarget(self._num_classes, self._config.target,
                                       self._config.variances)
            class_targets, bbox_offsets_targets = target_creator(
                class_probabilities, anchors, gt_boxes)

            # Filter the predictions and targets that we will ignore during
            # training due to hard negative mining. We use class_targets to
            # know which ones to ignore (they are marked as -1 if they are to
            # be ignored)
            with tf.name_scope('hard_negative_mining_filter'):
                predictions_filter = tf.greater_equal(class_targets, 0)

                anchors = tf.boolean_mask(anchors, predictions_filter)
                bbox_offsets_targets = tf.boolean_mask(bbox_offsets_targets,
                                                       predictions_filter)
                class_targets = tf.boolean_mask(class_targets,
                                                predictions_filter)
                class_scores = tf.boolean_mask(class_scores,
                                               predictions_filter)
                class_probabilities = tf.boolean_mask(class_probabilities,
                                                      predictions_filter)
                bbox_offsets = tf.boolean_mask(bbox_offsets,
                                               predictions_filter)

            # Add target tensors to prediction dict
            prediction_dict['target'] = {
                'cls': class_targets,
                'bbox_offsets': bbox_offsets_targets,
                'anchors': anchors
            }

        # Add network's raw output to prediction dict
        prediction_dict['cls_pred'] = class_scores
        prediction_dict['loc_pred'] = bbox_offsets

        # We generate proposals when predicting, or when debug=True for
        # generating visualizations during training.
        if not is_training or self._debug:
            proposals_creator = SSDProposal(self._num_classes,
                                            self._config.proposals,
                                            self._config.variances)
            proposals = proposals_creator(
                class_probabilities, bbox_offsets, anchors,
                tf.cast(tf.shape(image)[1:3], tf.float32))
            prediction_dict['classification_prediction'] = proposals

        # Add some non essential metrics for debugging
        if self._debug:
            prediction_dict['all_anchors'] = anchors
            prediction_dict['cls_prob'] = class_probabilities

        return prediction_dict