def __getitem__(self, idx): '''Load the image and its bboxes for the given index. Args --- idx: the index of images. Returns --- tuple: A tuple containing the following items: image, bboxes, labels. ''' img_info = self.img_infos[idx] ann_info = self._load_ann_info(idx) # load the image. img = cv2.imread(f"{self.image_dir}/{img_info['file_name']}", cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ori_shape = img.shape # Load the annotation. ann = self._parse_ann_info(ann_info) bboxes = ann['bboxes'] labels = ann['labels'] segments = ann['segments'] # list masks, global_mask = self._segments_to_mask(segments, labels, ori_shape[:2]) flip = True if np.random.rand() < self.flip_ratio else False # Handle the image img, img_shape, scale_factor, masks, global_mask = self.img_transform( img, masks, global_mask, flip) # Resize masks to smaller size to reduce memory usage if self.config.USE_MINI_MASK and not self.debug: masks = self.minimize_mask(bboxes, masks, self.config.MINI_MASK_SHAPE, softmask=self.config.SOFT_MASK) pad_shape = img.shape # Handle the annotation. bboxes, labels = self.bbox_transform(bboxes, labels, img_shape, scale_factor, flip) # Handle the meta info. img_meta_dict = dict({ 'ori_shape': ori_shape, 'img_shape': img_shape, 'pad_shape': pad_shape, 'scale_factor': scale_factor, 'flip': flip }) img_meta = utils.compose_image_meta(img_meta_dict) return img, img_meta, bboxes, labels, masks, global_mask
def __getitem__(self, idx): '''Load the image and its bboxes for the given index. Args --- idx: the index of images. Returns --- tuple: A tuple containing the following items: image, bboxes, labels. ''' img_info = self.img_infos[idx] ann_info = self._load_ann_info(idx) # load the image. img = cv2.imread(osp.join(self.image_dir, img_info['file_name']), cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ori_shape = img.shape # Load the annotation. ann = self._parse_ann_info(ann_info) bboxes = ann['bboxes'] labels = ann['labels'] flip = True if np.random.rand() < self.flip_ratio else False # Handle the image img, img_shape, scale_factor = self.img_transform(img, flip) pad_shape = img.shape # Handle the annotation. bboxes, labels = self.bbox_transform(bboxes, labels, img_shape, scale_factor, flip) # Handle the meta info. img_meta_dict = dict({ 'ori_shape': ori_shape, 'img_shape': img_shape, 'pad_shape': pad_shape, 'scale_factor': scale_factor, 'flip': flip }) img_meta = utils.compose_image_meta(img_meta_dict) return img, img_meta, bboxes, labels
def __getitem__(self, idx): """根据下标号返回图片和标注信息 Args --- idx: 图片下标 Returns --- img: [height, width, channels] img_meta: [11] bboxes: [num_boxes, 4] labels: [num_boxes] """ # img_infos: dict dict为coco格式的图片信息 # ann_info: [dict] dict为coco格式的标注信息 img_info = self.img_infos[idx] ann_info = self._load_ann_info(idx) # cv2加载BGR图片,转换成RGB img = cv2.imread(osp.join(self.image_dir, img_info['file_name']), cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ori_shape = img.shape # ann: dict 字典的key为bboxes, labels, bboxes_ignore ann = self._parse_ann_info(ann_info) bboxes = ann['bboxes'] labels = ann['labels'] flip = True if np.random.rand() < self.flip_ratio else False # 按概率随机决定是否翻转 # 图片和边界框的预处理 img, img_shape, scale_factor = self.img_transform(img, flip) bboxes = self.bbox_transform(bboxes, img_shape, scale_factor, flip) # 整合图片信息 img_meta_dict = dict({ 'ori_shape': ori_shape, # 图片原始尺寸 'img_shape': img_shape, # 图片缩放后的尺寸 'pad_shape': img.shape, # 图片缩放+填充后的尺寸 'scale_factor': scale_factor, # 缩放因子,缩放后尺寸 = int(缩放前尺寸*scale_factor + 0.5) 'flip': flip # 是否进行左右翻转 }) img_meta = utils.compose_image_meta(img_meta_dict) return img, img_meta, bboxes, labels
source_class_ids = self.source_class_ids[self.image_info[image_id]["source"]] active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # Image meta data img_meta_dict = dict({ 'ori_shape': original_shape, 'img_shape': , 'pad_shape': image.shape, 'scale_factor': scale, 'flip': flip }) img_meta = utils.compose_image_meta(img_meta_dict) image_meta = compose_image_meta(image_id, original_shape, image.shape, window, scale, active_class_ids) return image, image_meta, class_ids, bbox, mask # def compose_image_meta(image_id, original_image_shape, image_shape, # window, scale, active_class_ids): # """Takes attributes of an image and puts them in one 1D array. # image_id: An int ID of the image. Useful for debugging. # original_image_shape: [H, W, C] before resizing or padding. # image_shape: [H, W, C] after resizing and padding # window: (y1, x1, y2, x2) in pixels. The area of the image where the real # image is (excluding the padding) # scale: The scaling factor applied to the original image (float32)