예제 #1
0
    def load_mask(self, image_id):
        """Load instance masks for the given image.

        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].

        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """

        image_info = self.image_info[image_id]

        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = self.map_source_class_id("taco.{}".format(
                annotation['category_id']))
            if class_id:
                m = utils.annToMask(annotation, image_info["height"],
                                    image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[
                            1] != image_info["width"]:
                        m = np.ones(
                            [image_info["height"], image_info["width"]],
                            dtype=bool)
                instance_masks.append(m)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            mask = np.stack(instance_masks, axis=2).astype(np.bool)
            class_ids = np.array(class_ids, dtype=np.int32)
            return mask, class_ids
        else:
            # Call super class to return an empty mask
            return super(Taco, self).load_mask(image_id)
예제 #2
0
def data_generator():
    ann_file = '{}/annotations/instances_{}.json'.format(
        config.DATASET_DIR, config.DATASET_TYPE)
    coco = COCO(ann_file)
    categories = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in categories]
    print('COCO categories: \n{}\n'.format(' '.join(nms)))

    img_ids = coco.getImgIds()
    all_anchors = utils.generate_anchors()
    while True:
        rand = np.random.randint(0, len(img_ids))
        # rand = 3118
        # print(rand)

        img_info = coco.loadImgs(img_ids[rand])[0]
        img = scipy.ndimage.imread(config.DATASET_DIR + '\\' +
                                   config.DATASET_TYPE + '\\' +
                                   img_info['file_name'])
        img = img.astype(np.float32) / 255.
        ratio, img, offset = utils.resize_keep_ratio(img, (1024, 1024))

        ann_ids = coco.getAnnIds(imgIds=img_info['id'], iscrowd=0)
        anns = coco.loadAnns(ann_ids)
        bboxs = [ann['bbox'] for ann in anns]
        bboxs = np.vstack(bboxs)
        # OFFSET one for backgroound
        cls = np.array([ann['category_id'] + 1 for ann in anns])
        masks = np.array([
            utils.annToMask(ann, img_info['height'], img_info['width'])
            for ann in anns
        ])

        # resize masks to desired shape
        bboxs_ind = bboxs.astype(np.int)
        masks = np.array([
            cv2.resize(
                mask[bboxs_ind[i, 1]:bboxs_ind[i, 1] + bboxs_ind[i, 3],
                     bboxs_ind[i, 0]:bboxs_ind[i, 0] + bboxs_ind[i, 2]],
                (config.MASK_OUTPUT_SHAPE, config.MASK_OUTPUT_SHAPE))
            for i, mask in enumerate(masks)
        ])
        bboxs = bboxs * ratio
        bboxs[:, :2] += offset
        bboxs_rpn = bboxs

        valid_label_range = 0
        # we pad ot trim all labels to MAX_GT_TRAIN_INSTANCES to make it batched
        if bboxs.shape[0] > config.MAX_GT_TRAIN_INSTANCES:
            valid_label_range = config.MAX_GT_TRAIN_INSTANCES
            bboxs = bboxs[:config.MAX_GT_TRAIN_INSTANCES, :]
            cls = cls[:config.MAX_GT_TRAIN_INSTANCES]
            masks = masks[:config.MAX_GT_TRAIN_INSTANCES, :, :]
        else:
            valid_label_range = bboxs.shape[0]
            bboxs = np.pad(
                bboxs,
                ((0, config.MAX_GT_TRAIN_INSTANCES - bboxs.shape[0]), (0, 0)),
                mode='constant',
                constant_values=((0, 0), (0, 0)))
            cls = np.pad(cls,
                         (0, config.MAX_GT_TRAIN_INSTANCES - cls.shape[0]),
                         mode='constant',
                         constant_values=(0, 0))
            masks = np.pad(
                masks, ((0, config.MAX_GT_TRAIN_INSTANCES - masks.shape[0]),
                        (0, 0), (0, 0)),
                mode='constant',
                constant_values=((0, 0), (0, 0), (0, 0)))

        # pre compute rpn targets
        anchor_types, matches = utils.generate_anchor_types(
            all_anchors, bboxs_rpn)
        rpn_positive_mask, rpn_mask = utils.get_mask(anchor_types)
        rpn_labels = utils.generate_rpn_labels(anchor_types, rpn_mask)
        rpn_deltas = utils.generate_rpn_deltas(all_anchors, bboxs_rpn,
                                               rpn_positive_mask, matches)
        rpn_positive_range = rpn_deltas.shape[0]
        # do some padding
        rpn_deltas = np.pad(
            rpn_deltas,
            ((0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range),
             (0, 0)), 'constant')
        rpn_positive_mask = np.pad(
            rpn_positive_mask,
            (0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range),
            'constant',
            constant_values=-1)

        if config.DEBUG:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            plt.imshow(img)
            # coco.showAnns(anns)
            for bbox in bboxs:
                ax.add_patch(
                    patches.Rectangle(
                        (bbox[0], bbox[1]),
                        bbox[2],
                        bbox[3],
                        edgecolor="red",
                        fill=False  # remove background
                    ))
            for m in matches:
                ax.add_patch(
                    patches.Rectangle(
                        (all_anchors[m][0], all_anchors[m][1]),
                        all_anchors[m][2],
                        all_anchors[m][3],
                        edgecolor="blue",
                        fill=False  # remove background
                    ))
            plt.show()
        # we feed precomputed rpn masks on multi-threaded cpu
        print()
        yield img, bboxs, rpn_labels, rpn_deltas, rpn_mask, rpn_positive_range, rpn_positive_mask, cls, masks, valid_label_range
예제 #3
0
    def load_augment_data(self, image_id):
        """Generate augmented data for the image with the given ID.
        """
        info = self.image_info[image_id]
        image = self.load_image(image_id)

        # apply random gamma correction to the image
        gamma = np.random.uniform(0.8, 1)
        gain = np.random.uniform(0.8, 1)
        image = exposure.adjust_gamma(image, gamma, gain)

        # generate random rotation degree
        rotate_degree = np.random.uniform(-5, 5)

        if info["source"] in ["ShapeNetTOI", "Real"]:
            domain_label = 0 ## has coordinate map loss

            mask_path = info["path"] + '_mask.png'
            coord_path = info["path"] + '_coord.png'
            inst_dict = info['inst_dict']
            meta_path = info["path"] + '_meta.txt'

            mask_im = cv2.imread(mask_path)[:, :, 2]
            coord_map = cv2.imread(coord_path)[:, :, :3]
            coord_map = coord_map[:, :, ::-1]

            image, mask_im, coord_map = utils.rotate_and_crop_images(image, 
                                                                     masks=mask_im, 
                                                                     coords=coord_map, 
                                                                     rotate_degree=rotate_degree)
            masks, coords, class_ids, scales = self.process_data(mask_im, coord_map, inst_dict, meta_path)
        elif info["source"]=="coco":
            domain_label = 1 ## no coordinate map loss

            instance_masks = []
            class_ids = []
            annotations = self.image_info[image_id]["annotations"]
            # Build mask of shape [height, width, instance_count] and list
            # of class IDs that correspond to each channel of the mask.
            for annotation in annotations:
                class_id = self.map_source_class_id(
                    "coco.{}".format(annotation['category_id']))
                if class_id:
                    m = utils.annToMask(annotation, info["height"],
                                       info["width"])
                    # Some objects are so small that they're less than 1 pixel area
                    # and end up rounded out. Skip those objects.
                    if m.max() < 1:
                        continue
                    instance_masks.append(m)
                    class_ids.append(class_id)

            # Pack instance masks into an array
            masks = np.stack(instance_masks, axis=2)
            class_ids = np.array(class_ids, dtype=np.int32)

            #print('\nbefore augmented, image shape: {}, masks shape: {}'.format(image.shape, masks.shape))
            image, masks = utils.rotate_and_crop_images(image, 
                                                        masks=masks, 
                                                        coords=None, 
                                                        rotate_degree=rotate_degree)
                        
            #print('\nafter augmented, image shape: {}, masks shape: {}'.format(image.shape, masks.shape))
            
            if len(masks.shape)==2:
                masks = masks[:, :, np.newaxis]
            
            final_masks = []
            final_class_ids = []
            for i in range(masks.shape[-1]):
                m = masks[:, :, i]
                if m.max() < 1:
                    continue
                final_masks.append(m)
                final_class_ids.append(class_ids[i])

            if final_class_ids:
                masks = np.stack(final_masks, axis=2)
                class_ids = np.array(final_class_ids, dtype=np.int32)
            else:
                # Call super class to return an empty mask
                masks = np.empty([0, 0, 0])
                class_ids = np.empty([0], np.int32)


            # use zero arrays as coord map for COCO images
            coords = np.zeros(masks.shape+(3,), dtype=np.float32)
            scales = np.ones((len(class_ids),3), dtype=np.float32)

        else:
            assert False


        return image, masks, coords, class_ids, scales, domain_label
예제 #4
0
    def load_mask(self, image_id):
        """Generate instance masks for the objects in the image with the given ID.
        """
        info = self.image_info[image_id]
        #masks, coords, class_ids, scales, domain_label = None, None, None, None, None

        if info["source"] in ["ShapeNetTOI", "Real"]:
            domain_label = 0 ## has coordinate map loss

            mask_path = info["path"] + '_mask.png'
            coord_path = info["path"] + '_coord.png'

            assert os.path.exists(mask_path), "{} is missing".format(mask_path)
            assert os.path.exists(coord_path), "{} is missing".format(coord_path)

            inst_dict = info['inst_dict']
            meta_path = info["path"] + '_meta.txt'

            mask_im = cv2.imread(mask_path)[:, :, 2]
            coord_map = cv2.imread(coord_path)[:, :, :3]
            coord_map = coord_map[:, :, (2, 1, 0)]

            masks, coords, class_ids, scales = self.process_data(mask_im, coord_map, inst_dict, meta_path)


        elif info["source"]=="coco":
            domain_label = 1 ## no coordinate map loss

            instance_masks = []
            class_ids = []
            annotations = self.image_info[image_id]["annotations"]
            # Build mask of shape [height, width, instance_count] and list
            # of class IDs that correspond to each channel of the mask.
            for annotation in annotations:
                class_id = self.map_source_class_id(
                    "coco.{}".format(annotation['category_id']))
                if class_id:
                    m = utils.annToMask(annotation, info["height"],
                                       info["width"])
                    # Some objects are so small that they're less than 1 pixel area
                    # and end up rounded out. Skip those objects.
                    if m.max() < 1:
                        continue
                    instance_masks.append(m)
                    class_ids.append(class_id)

            # Pack instance masks into an array
            if class_ids:
                masks = np.stack(instance_masks, axis=2)
                class_ids = np.array(class_ids, dtype=np.int32)
            else:
                # Call super class to return an empty mask
                masks = np.empty([0, 0, 0])
                class_ids = np.empty([0], np.int32)

            # use zero arrays as coord map for COCO images
            coords = np.zeros(masks.shape+(3,), dtype=np.float32)
            scales = np.ones((len(class_ids),3), dtype=np.float32)
            #print('\nwithout augmented, masks shape: {}'.format(masks.shape))
        else:
            assert False

        return masks, coords, class_ids, scales, domain_label