Exemplo n.º 1
0
    def __create_label(self, bboxes):
        """
        (1.25, 1.625), (2.0, 3.75), (4.125, 2.875) 这三个anchor用于small_detector预测小物体
        [(1.875, 3.8125), (3.875, 2.8125), (3.6875, 7.4375) 这三个anchor用于medium_detector预测中物体
        [(3.625, 2.8125), (4.875, 6.1875), (11.65625, 10.1875) 这三个anchor用于big_detector预测大物体
        与bbox有最大IOU的anchor,视为best anchor,best anchor所属的detector负责预测该bbox,
        根据这一准则,对于一张图的所有bbox,每个detector都有自己要负责预测的bbox
        small_detector负责预测的bbox放在sbboxes中,
        medium_detector负责预测的bbox放在mbboxes中
        big_detector负责预测的bbox放在lbboxes中
        需始终记住:
        small_detector对应下标索引0, medium_detector对应下标索引1,big_detector对应下标索引2
        :param bboxes: 一张图对应的所有bbox和每个bbox所属的类别,bbox的坐标为(xmin, ymin, xmax, ymax, class_ind)
        :return:
        label_sbbox: shape为(input_size / 8, input_size / 8, anchor_per_scale, 5 + num_classes)
        label_mbbox: shape为(input_size / 16, input_size / 16, anchor_per_scale, 5 + num_classes)
        label_lbbox: shape为(input_size / 32, input_size / 32, anchor_per_scale, 5 + num_classes)
        只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小
        其他非best anchor对应位置的数据都为(0, 0, 0, 0, 0, 0...)
        sbboxes:shape为(max_bbox_per_scale, 4)
        mbboxes:shape为(max_bbox_per_scale, 4)
        lbboxes:shape为(max_bbox_per_scale, 4)
        存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小
        bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU
        """
        label = [np.zeros((self.__train_output_sizes[i], self.__train_output_sizes[i], self.__anchor_per_scale,
                           5 + self.__num_classes)) for i in range(3)]
        bboxes_xywh = [np.zeros((self.__max_bbox_per_scale, 4)) for _ in range(3)]
        bbox_count = np.zeros((3,))

        for bbox in bboxes:
            bbox_coor = bbox[:4]
            bbox_class_ind = bbox[4]

            # label smooth
            onehot = np.zeros(self.__num_classes, dtype=np.float)
            onehot[bbox_class_ind] = 1.0
            uniform_distribution = np.full(self.__num_classes, 1.0 / self.__num_classes)
            deta = 0.01
            smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

            # (1)(xmin, ymin, xmax, ymax) -> (x, y, w, h)
            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)

            # (2)(x, y, w, h) / stride
            # 对bbox使用三种stride,得到三个detector对应的尺度
            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.__strides[:, np.newaxis]

            # (3)计算所有Anchor与该bbox的IOU,并获取最大IOU对应的best anchor
            iou = []
            exist_positive = False
            for i in range(3):
                anchors_xywh = np.zeros((self.__anchor_per_scale, 4))
                anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                anchors_xywh[:, 2:4] = self.__anchors[i]

                iou_scale = utils.iou_calc2(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
                iou.append(iou_scale)
                iou_mask = iou_scale > 0.3

                if np.any(iou_mask):
                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)

                    # (4)将iou大于0.3的anchor对应位置的数据标识为(x, y, w, h, 1, classes)
                    # 首先需要将该Anchor对应的标签清零,因为某个Anchor可能与多个bbox的IOU大于0.3
                    # 如果不清零,那么该Anchor可能会被标记为多类
                    label[i][yind, xind, iou_mask, :] = 0
                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                    label[i][yind, xind, iou_mask, 4:5] = 1.0
                    label[i][yind, xind, iou_mask, 5:] = onehot

                    bbox_ind = int(bbox_count[i] % self.__max_bbox_per_scale)
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1

                    exist_positive = True

            if not exist_positive:
                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
                best_detect = int(best_anchor_ind / self.__anchor_per_scale)
                best_anchor = int(best_anchor_ind % self.__anchor_per_scale)
                xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)

                # (4)将best_anchor对应位置的数据标识为(x, y, w, h, 1, classes)
                # 首先需要将该Anchor对应的标签清零,因为某个Anchor可能与多个bbox有最大IOU,
                # 当输入图片尺寸为416时,与多个bbox有最大IOU的Anchor总共有248个
                # 如果不清零,那么该Anchor可能会被标记为多类
                label[best_detect][yind, xind, best_anchor, :] = 0
                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
                label[best_detect][yind, xind, best_anchor, 5:] = onehot

                bbox_ind = int(bbox_count[best_detect] % self.__max_bbox_per_scale)
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
                bbox_count[best_detect] += 1
        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh
        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
Exemplo n.º 2
0
    def __create_label(self, bboxes):
        """
        (1.25, 1.625), (2.0, 3.75), (4.125, 2.875) 这三个anchor用于small_detector预测小物体
        [(1.875, 3.8125), (3.875, 2.8125), (3.6875, 7.4375) 这三个anchor用于medium_detector预测中物体
        [(3.625, 2.8125), (4.875, 6.1875), (11.65625, 10.1875) 这三个anchor用于big_detector预测大物体
        与bbox有最大IOU的anchor,视为best anchor,best anchor所属的detector负责预测该bbox,
        根据这一准则,对于一张图的所有bbox,每个detector都有自己要负责预测的bbox
        small_detector负责预测的bbox放在sbboxes中,
        medium_detector负责预测的bbox放在mbboxes中
        big_detector负责预测的bbox放在lbboxes中
        需始终记住:
        small_detector对应下标索引0, medium_detector对应下标索引1,big_detector对应下标索引2
        :param bboxes: 一张图对应的所有bbox和每个bbox所属的类别,bbox的坐标为(xmin, ymin, xmax, ymax, class_ind)
        :return:
        label_sbbox: shape为(input_size / 8, input_size / 8, anchor_per_scale, 5 + num_classes)
        label_mbbox: shape为(input_size / 16, input_size / 16, anchor_per_scale, 5 + num_classes)
        label_lbbox: shape为(input_size / 32, input_size / 32, anchor_per_scale, 5 + num_classes)
        只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小
        其他非best anchor对应位置的数据都为(0, 0, 0, 0, 0, 0...)
        sbboxes:shape为(max_bbox_per_scale, 4)
        mbboxes:shape为(max_bbox_per_scale, 4)
        lbboxes:shape为(max_bbox_per_scale, 4)
        存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小
        bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU
        """
        label = [np.zeros((self.__train_output_sizes[i], self.__train_output_sizes[i], self.__anchor_per_scale,
                           5 + self.__num_classes)) for i in range(3)]
        bboxes_xywh = [np.zeros((self.__max_bbox_per_scale, 4)) for _ in range(3)]
        bbox_count = np.zeros((3,))

        for bbox in bboxes:
            bbox_coor = bbox[:4]
            bbox_class_ind = bbox[4]

            # (1)(xmin, ymin, xmax, ymax) -> (x, y, w, h)
            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)

            # (2)(x, y, w, h) / stride
            # 对bbox使用三种stride,得到三个detector对应的尺度
            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.__strides[:, np.newaxis]

            # (3)计算所有Anchor与该bbox的IOU,并获取最大IOU对应的best anchor
            iou = []
            for i in range(3):
                anchors_xywh = np.zeros((self.__anchor_per_scale, 4))
                anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
                anchors_xywh[:, 2:4] = self.__anchors[i]
                iou.append(utils.iou_calc2(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh))
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.__anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.__anchor_per_scale)
            xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)

            # (4)将best_anchor对应位置的数据标识为(x, y, w, h, 1, classes)
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5 + bbox_class_ind] = 1.0

            bbox_ind = int(bbox_count[best_detect] % self.__max_bbox_per_scale)
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh
        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes