Пример #1
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(0,
                                   high=len(cfg.TRAIN.SCALES),
                                   size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        assert im is not None, \
            'Failed to read image \'{}\''.format(roidb[i]['image'])

        # AUG BEGIN--------------------------------
        aug_flag = np.random.choice([0, 1], p=[0.5, 0.5])
        if aug_flag:
            img_id = roidb[i]['id']
            ann_ids = coco.getAnnIds(imgIds=img_id)
            anns = coco.loadAnns(ann_ids)

            new_ann, im = get_new_data(anns, im, None, background=None)
            if new_ann and new_ann[0]:
                try:
                    new_roidb, ratio_list, ratio_index = combined_roidb_for_training( \
                        ('coco_2017_train',), cfg.TRAIN.PROPOSAL_FILES, \
                        img_id, new_ann, coco
                        )
                    if new_roidb:
                        if roidb[i]['flipped']:
                            roidb[i] = new_roidb[1]
                        else:
                            roidb[i] = new_roidb[0]
                except IndexError:
                    print("Index error")
        # AUG END----------------------------------
        # If NOT using opencv to read in images, uncomment following lines
        # if len(im.shape) == 2:
        #     im = im[:, :, np.newaxis]
        #     im = np.concatenate((im, im, im), axis=2)
        # # flip the channel, since the original one using cv2
        # # rgb -> bgr
        # im = im[:, :, ::-1]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                                   [target_size],
                                                   cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale[0])
        processed_ims.append(im[0])

    # Create a blob to hold the input images [n, c, h, w]
    blob = blob_utils.im_list_to_blob(processed_ims)

    return blob, im_scales, roidb
Пример #2
0
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        #ann = self.get_ann_info(idx)
        #------------------------expand function get_ann_info------------------------------
        img_id = self.img_infos[idx]['id']
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        ann_info = self.coco.loadAnns(ann_ids)
        #------------------------AUG BEGIN-------------------------------------------------
        aug_flag = np.random.choice([0, 1], p=[0.5, 0.5])
        if aug_flag:
            ann_info, img = get_new_data(ann_info, img, None, background=None)
        #------------------------AUG END---------------------------------------------------
        ann = self._parse_ann_info(ann_info, self.with_mask)
        #------------------------expand function get_ann_info------------------------------

        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            img_sz = (img_info['width'], img_info['height'])
            masks = [get_mask(x, img_sz) for x in ann['masks']]
            gt_masks = self.mask_transform(masks, pad_shape, scale_factor,
                                           flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        return data
Пример #3
0
    def pull_item(self, index):
        """
        Args:
            index (int): Indexcolor_cache
        Returns:
            tuple: Tuple (image, target, masks, height, width, crowd).
                   target is the object returned by ``coco.loadAnns``.
            Note that if no crowd annotations exist, crowd will be None
        """
        img_id = self.ids[index]

        if self.has_gt:
            target = self.coco.imgToAnns[img_id]
            ann_ids = self.coco.getAnnIds(imgIds=img_id)

            # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
            target = self.coco.loadAnns(ann_ids)
        else:
            target = []

        file_name = self.coco.loadImgs(img_id)[0]['file_name']
        if file_name.startswith('COCO'):
            file_name = file_name.split('_')[-1]
        path = osp.join(self.root, file_name)
        assert osp.exists(path), 'Image path does not exist: {}'.format(path)
        img = cv2.imread(path)

        if self.is_train:
            target, img = get_new_data(target, img, None, background=None)

        # Separate out crowd annotations. These are annotations that signify a large crowd of
        # objects of said class, where there is no annotation for each individual object. Both
        # during testing and training, consider these crowds as neutral.
        crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])]
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]
        num_crowds = len(crowd)

        # This is so we ensure that all crowd annotations are at the end of the array
        target += crowd

        # The split here is to have compatibility with both COCO2014 and 2017 annotations.
        # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg.
        # Our script downloads the images as %012d.jpg so convert accordingly.
        # file_name = self.coco.loadImgs(img_id)[0]['file_name']

        # if file_name.startswith('COCO'):
        #     file_name = file_name.split('_')[-1]

        # path = osp.join(self.root, file_name)
        # assert osp.exists(path), 'Image path does not exist: {}'.format(path)
        # img = cv2.imread(path)

        height, width, _ = img.shape

        if len(target) > 0:
            # Pool all the masks for this image into one [num_objects,height,width] matrix
            masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
            masks = np.vstack(masks)
            masks = masks.reshape(-1, height, width)

        if self.target_transform is not None and len(target) > 0:
            target = self.target_transform(target, width, height)

        if self.transform is not None:
            if len(target) > 0:
                target = np.array(target)
                img, masks, boxes, labels = self.transform(
                    img, masks, target[:, :4], {
                        'num_crowds': num_crowds,
                        'labels': target[:, 4]
                    })

                # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations
                num_crowds = labels['num_crowds']
                labels = labels['labels']

                target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
            else:
                img, _, _, _ = self.transform(
                    img, np.zeros((1, height, width), dtype=np.float),
                    np.array([[0, 0, 1, 1]]), {
                        'num_crowds': 0,
                        'labels': np.array([0])
                    })
                masks = None
                target = None

        return torch.from_numpy(img).permute(
            2, 0, 1), target, masks, height, width, num_crowds