Пример #1
0
    def __getitem__(self, idx):
        '''Load the image and its bboxes for the given index.
        
        Args
        ---
            idx: the index of images.
            
        Returns
        ---
            tuple: A tuple containing the following items: image, 
                bboxes, labels.
        '''
        img_info = self.img_infos[idx]
        ann_info = self._load_ann_info(idx)

        # load the image.
        img = cv2.imread(f"{self.image_dir}/{img_info['file_name']}",
                         cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        ori_shape = img.shape

        # Load the annotation.
        ann = self._parse_ann_info(ann_info)
        bboxes = ann['bboxes']
        labels = ann['labels']
        segments = ann['segments']  # list
        masks, global_mask = self._segments_to_mask(segments, labels,
                                                    ori_shape[:2])
        flip = True if np.random.rand() < self.flip_ratio else False

        # Handle the image
        img, img_shape, scale_factor, masks, global_mask = self.img_transform(
            img, masks, global_mask, flip)
        # Resize masks to smaller size to reduce memory usage
        if self.config.USE_MINI_MASK and not self.debug:
            masks = self.minimize_mask(bboxes,
                                       masks,
                                       self.config.MINI_MASK_SHAPE,
                                       softmask=self.config.SOFT_MASK)
        pad_shape = img.shape

        # Handle the annotation.
        bboxes, labels = self.bbox_transform(bboxes, labels, img_shape,
                                             scale_factor, flip)

        # Handle the meta info.
        img_meta_dict = dict({
            'ori_shape': ori_shape,
            'img_shape': img_shape,
            'pad_shape': pad_shape,
            'scale_factor': scale_factor,
            'flip': flip
        })

        img_meta = utils.compose_image_meta(img_meta_dict)

        return img, img_meta, bboxes, labels, masks, global_mask
Пример #2
0
    def __getitem__(self, idx):
        '''Load the image and its bboxes for the given index.
        
        Args
        ---
            idx: the index of images.
            
        Returns
        ---
            tuple: A tuple containing the following items: image, 
                bboxes, labels.
        '''
        img_info = self.img_infos[idx]
        ann_info = self._load_ann_info(idx)

        # load the image.
        img = cv2.imread(osp.join(self.image_dir, img_info['file_name']),
                         cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        ori_shape = img.shape

        # Load the annotation.
        ann = self._parse_ann_info(ann_info)
        bboxes = ann['bboxes']
        labels = ann['labels']

        flip = True if np.random.rand() < self.flip_ratio else False

        # Handle the image
        img, img_shape, scale_factor = self.img_transform(img, flip)

        pad_shape = img.shape

        # Handle the annotation.
        bboxes, labels = self.bbox_transform(bboxes, labels, img_shape,
                                             scale_factor, flip)

        # Handle the meta info.
        img_meta_dict = dict({
            'ori_shape': ori_shape,
            'img_shape': img_shape,
            'pad_shape': pad_shape,
            'scale_factor': scale_factor,
            'flip': flip
        })

        img_meta = utils.compose_image_meta(img_meta_dict)

        return img, img_meta, bboxes, labels
Пример #3
0
    def __getitem__(self, idx):
        """根据下标号返回图片和标注信息

        Args
        ---
            idx: 图片下标

        Returns
        ---
            img: [height, width, channels]
            img_meta: [11]
            bboxes: [num_boxes, 4]
            labels: [num_boxes]
        """
        # img_infos: dict dict为coco格式的图片信息
        # ann_info: [dict] dict为coco格式的标注信息
        img_info = self.img_infos[idx]
        ann_info = self._load_ann_info(idx)

        # cv2加载BGR图片,转换成RGB
        img = cv2.imread(osp.join(self.image_dir, img_info['file_name']), cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        ori_shape = img.shape

        # ann: dict 字典的key为bboxes, labels, bboxes_ignore
        ann = self._parse_ann_info(ann_info)
        bboxes = ann['bboxes']
        labels = ann['labels']

        flip = True if np.random.rand() < self.flip_ratio else False  # 按概率随机决定是否翻转

        # 图片和边界框的预处理
        img, img_shape, scale_factor = self.img_transform(img, flip)
        bboxes = self.bbox_transform(bboxes, img_shape, scale_factor, flip)

        # 整合图片信息
        img_meta_dict = dict({
            'ori_shape': ori_shape,  # 图片原始尺寸
            'img_shape': img_shape,  # 图片缩放后的尺寸
            'pad_shape': img.shape,  # 图片缩放+填充后的尺寸
            'scale_factor': scale_factor,  # 缩放因子,缩放后尺寸 = int(缩放前尺寸*scale_factor + 0.5)
            'flip': flip  # 是否进行左右翻转
        })

        img_meta = utils.compose_image_meta(img_meta_dict)

        return img, img_meta, bboxes, labels
Пример #4
0
        source_class_ids = self.source_class_ids[self.image_info[image_id]["source"]]
        active_class_ids[source_class_ids] = 1

        # Resize masks to smaller size to reduce memory usage
        if use_mini_mask:
            mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

        # Image meta data
        img_meta_dict = dict({
            'ori_shape': original_shape,
            'img_shape': ,
            'pad_shape': image.shape,
            'scale_factor': scale,
            'flip': flip
        })
        img_meta = utils.compose_image_meta(img_meta_dict)
        image_meta = compose_image_meta(image_id, original_shape, image.shape,
                                        window, scale, active_class_ids)

        return image, image_meta, class_ids, bbox, mask   

    # def compose_image_meta(image_id, original_image_shape, image_shape,
    #                         window, scale, active_class_ids):
    #     """Takes attributes of an image and puts them in one 1D array.

    #     image_id: An int ID of the image. Useful for debugging.
    #     original_image_shape: [H, W, C] before resizing or padding.
    #     image_shape: [H, W, C] after resizing and padding
    #     window: (y1, x1, y2, x2) in pixels. The area of the image where the real
    #             image is (excluding the padding)
    #     scale: The scaling factor applied to the original image (float32)