Exemplo n.º 1
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    # boxes is list[np.array]
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)  # 创建一个实例集,保存一个图片所有对象的信息
    # 设置属性gt_boxes
    target.gt_boxes = Boxes(boxes)

    classes = [int(obj["category_id"]) for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    # 设置类别id
    target.gt_classes = classes
    # 下面是segmentation的
    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            # TODO check type and provide better error
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Exemplo n.º 2
0
def get_empty_instance(h, w):
    inst = Instances((h, w))
    inst.gt_boxes = Boxes(torch.rand(0, 4))
    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
    inst.gt_masks = BitMasks(torch.rand(0, h, w))
    return inst
Exemplo n.º 3
0
def get_class_masks_from_instances(
    instances,
    class_id=1,
    add_ignore=True,
    rend_size=REND_SIZE,
    bbox_expansion=BBOX_EXPANSION_FACTOR,
    min_confidence=0.0,
    image_size=IMAGE_SIZE,
):
    """
    Gets occlusion-aware masks for a specific class index and additional metadata from
    PointRend instances.

    Args:
        instances: Detectron2 Instances with segmentation predictions.
        class_id (int): Object class id (using COCO dense ordering).
        add_ignore (bool): If True, adds occlusion-aware masking.
        rend_size (int): Mask size.
        bbox_expansion (float): Amount to pad the masks. This is important to prevent
            ignoring background pixels right outside the bounding box.
        min_confidence (float): Minimum confidence threshold for masks.

    Returns:
        keep_masks (N x rend_size x rend_size).
        keep_annotations (dict):
            "bbox":
            "class_id":
            "segmentation":
            "square_bbox":
    """
    if len(instances) == 0:
        return [], []
    instances = instances.to(torch.device("cpu:0"))
    boxes = instances.pred_boxes.tensor.numpy()
    class_ids = instances.pred_classes.numpy()
    scores = instances.scores.numpy()
    keep_ids = np.logical_and(class_ids == class_id, scores > min_confidence)
    bit_masks = BitMasks(instances.pred_masks)

    keep_annotations = []
    keep_masks = []
    full_boxes = torch.tensor([[0, 0, image_size, image_size]] * len(boxes)).float()
    full_sized_masks = bit_masks.crop_and_resize(full_boxes, image_size)
    for k in np.where(keep_ids)[0]:
        bbox = bbox_xy_to_wh(boxes[k])
        square_bbox = make_bbox_square(bbox, bbox_expansion)
        square_boxes = torch.FloatTensor(
            np.tile(bbox_wh_to_xy(square_bbox), (len(instances), 1))
        )
        masks = bit_masks.crop_and_resize(square_boxes, rend_size).clone().detach()
        if add_ignore:
            ignore_mask = masks[0]
            for i in range(1, len(masks)):
                ignore_mask = ignore_mask | masks[i]
            ignore_mask = -ignore_mask.float().numpy()
        else:
            ignore_mask = np.zeros(rend_size, rend_size)
        m = ignore_mask.copy()
        mask = masks[k]
        m[mask] = mask[mask]
        keep_masks.append(m)
        keep_annotations.append(
            {
                "bbox": bbox,
                "class_id": class_ids[k],
                "mask": full_sized_masks[k],
                "score": scores[k],
                "square_bbox": square_bbox,
            }
        )
    return keep_masks, keep_annotations
Exemplo n.º 4
0
    def process_inst(self, classes, scores, pred_inst, img_shape, ori_shape):
        """
        Simple process generate prediction of Things.

        Args:
            classes: predicted classes of Things
            scores: predicted scores of Things
            pred_inst: predicted instances of Things
            img_shape: input image shape
            ori_shape: original image shape

        Returns:
            result_instance: preserved results for Things
            pred_mask: preserved binary masks for Things
            classes: preserved object classes
            scores: processed object scores
        """
        pred_inst = pred_inst.sigmoid()[0]
        pred_mask = pred_inst > self.inst_thres
        # object rescore.
        sum_masks = pred_mask.sum((1, 2)).float() + 1e-6
        seg_score = (pred_inst * pred_mask.float()).sum((1, 2)) / sum_masks
        scores *= seg_score
        print('scores: ', scores.shape)
        keep = torch.argsort(scores, descending=True)

        pred_inst = pred_inst[keep]
        pred_mask = pred_mask[keep]
        scores = scores[keep]
        classes = classes[keep]
        sum_masks = sum_masks[keep]

        print('keep: ', keep.shape)
        print('pred_inst: ', pred_inst.shape)
        print('pred_mask: ', pred_mask.shape)
        print('scores: ', scores.shape)
        print('classes: ', classes.shape)
        print('sum_masks: ', sum_masks.shape)

        # object score filter.
        keep = scores >= 0.05
        print(keep)
        # print()
        if keep.sum() == 0:
            result_instance = Instances(ori_shape, pred_masks=[], pred_boxes=[],
                                        pred_classes=[], scores=[])
            return result_instance, pred_mask, None, None
        pred_inst = pred_inst[keep]
        scores = scores[keep]
        classes = classes[keep]

        # sort and keep top_k
        keep = torch.argsort(scores, descending=True)
        keep = keep[:self.center_top_num]
        pred_inst = pred_inst[keep]
        scores = scores[keep].reshape(-1)
        classes = classes[keep].reshape(-1).to(torch.int32)

        pred_inst = F.interpolate(pred_inst.unsqueeze(0),
                                  scale_factor=self.common_stride,
                                  mode="bilinear",
                                  align_corners=False)[..., :img_shape[0], :img_shape[1]]
        pred_inst = F.interpolate(pred_inst,
                                  size=ori_shape,
                                  mode="bilinear",
                                  align_corners=False)[0]

        pred_mask = pred_inst > self.inst_thres
        pred_bitinst = BitMasks(pred_mask)
        result_instance = Instances(ori_shape,
                                    pred_masks=pred_bitinst,
                                    pred_boxes=pred_bitinst.get_bounding_boxes(),
                                    pred_classes=classes,
                                    scores=scores)
        return result_instance, pred_mask, classes, scores
Exemplo n.º 5
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:
                   * "image": Tensor, image in (C, H, W) format.
                   * "sem_seg": semantic segmentation ground truth
                   * "center": center points heatmap ground truth
                   * "offset": pixel offsets to center points ground truth
                   * Other information that's included in the original dicts, such as:
                     "height", "width" (int): the output resolution of the model (may be different
                     from input resolution), used in inference.
        Returns:
            list[dict]:
              each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": see :func:`combine_semantic_and_instance_outputs` for its format.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        size_divisibility = self.backbone.size_divisibility
        images = ImageList.from_tensors(images, size_divisibility)

        features = self.backbone(images.tensor)

        losses = {}
        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, size_divisibility,
                self.sem_seg_head.ignore_value).tensor
            if "sem_seg_weights" in batched_inputs[0]:
                # The default D2 DatasetMapper may not contain "sem_seg_weights"
                # Avoid error in testing when default DatasetMapper is used.
                weights = [
                    x["sem_seg_weights"].to(self.device)
                    for x in batched_inputs
                ]
                weights = ImageList.from_tensors(weights,
                                                 size_divisibility).tensor
            else:
                weights = None
        else:
            targets = None
            weights = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(
            features, targets, weights)
        losses.update(sem_seg_losses)

        if "center" in batched_inputs[0] and "offset" in batched_inputs[0]:
            center_targets = [
                x["center"].to(self.device) for x in batched_inputs
            ]
            center_targets = ImageList.from_tensors(
                center_targets, size_divisibility).tensor.unsqueeze(1)
            center_weights = [
                x["center_weights"].to(self.device) for x in batched_inputs
            ]
            center_weights = ImageList.from_tensors(center_weights,
                                                    size_divisibility).tensor

            offset_targets = [
                x["offset"].to(self.device) for x in batched_inputs
            ]
            offset_targets = ImageList.from_tensors(offset_targets,
                                                    size_divisibility).tensor
            offset_weights = [
                x["offset_weights"].to(self.device) for x in batched_inputs
            ]
            offset_weights = ImageList.from_tensors(offset_weights,
                                                    size_divisibility).tensor
        else:
            center_targets = None
            center_weights = None

            offset_targets = None
            offset_weights = None

        center_results, offset_results, center_losses, offset_losses = self.ins_embed_head(
            features, center_targets, center_weights, offset_targets,
            offset_weights)
        losses.update(center_losses)
        losses.update(offset_losses)

        if self.training:
            return losses

        processed_results = []
        for sem_seg_result, center_result, offset_result, input_per_image, image_size in zip(
                sem_seg_results, center_results, offset_results,
                batched_inputs, images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
            c = sem_seg_postprocess(center_result, image_size, height, width)
            o = sem_seg_postprocess(offset_result, image_size, height, width)
            # Post-processing to get panoptic segmentation.
            panoptic_image, _ = get_panoptic_segmentation(
                r.argmax(dim=0, keepdim=True),
                c,
                o,
                thing_ids=self.meta.thing_dataset_id_to_contiguous_id.values(),
                label_divisor=self.meta.label_divisor,
                stuff_area=self.stuff_area,
                void_label=-1,
                threshold=self.threshold,
                nms_kernel=self.nms_kernel,
                top_k=self.top_k,
            )
            # For semantic segmentation evaluation.
            processed_results.append({"sem_seg": r})
            panoptic_image = panoptic_image.squeeze(0)
            semantic_prob = F.softmax(r, dim=0)
            # For panoptic segmentation evaluation.
            processed_results[-1]["panoptic_seg"] = (panoptic_image, None)
            # For instance segmentation evaluation.
            if self.predict_instances:
                instances = []
                panoptic_image_cpu = panoptic_image.cpu().numpy()
                for panoptic_label in np.unique(panoptic_image_cpu):
                    if panoptic_label == -1:
                        continue
                    pred_class = panoptic_label // self.meta.label_divisor
                    isthing = pred_class in list(
                        self.meta.thing_dataset_id_to_contiguous_id.values())
                    # Get instance segmentation results.
                    if isthing:
                        instance = Instances((height, width))
                        # Evaluation code takes continuous id starting from 0
                        instance.pred_classes = torch.tensor(
                            [pred_class], device=panoptic_image.device)
                        mask = panoptic_image == panoptic_label
                        instance.pred_masks = mask.unsqueeze(0)
                        # Average semantic probability
                        sem_scores = semantic_prob[pred_class, ...]
                        sem_scores = torch.mean(sem_scores[mask])
                        # Center point probability
                        mask_indices = torch.nonzero(mask).float()
                        center_y, center_x = (
                            torch.mean(mask_indices[:, 0]),
                            torch.mean(mask_indices[:, 1]),
                        )
                        center_scores = c[0,
                                          int(center_y.item()),
                                          int(center_x.item())]
                        # Confidence score is semantic prob * center prob.
                        instance.scores = torch.tensor(
                            [sem_scores * center_scores],
                            device=panoptic_image.device)
                        # Get bounding boxes
                        instance.pred_boxes = BitMasks(
                            instance.pred_masks).get_bounding_boxes()
                        instances.append(instance)
                if len(instances) > 0:
                    processed_results[-1]["instances"] = Instances.cat(
                        instances)

        return processed_results
Exemplo n.º 6
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"] for obj in annos]
        visible = [obj["visible_mask"] for obj in annos] 
        invisible = []
        for obj in annos:
            if "invisible_mask" in obj:
                invisible.append(obj["invisible_mask"])
            else:
                invisible.append([[0.0,0.0,0.0,0.0,0.0,0.0]])
                
        if mask_format == "polygon":
            # gt amodal masks per image 
            a_masks = PolygonMasks(segm)
            # gt visible masks per image 
            v_masks = PolygonMasks(visible)
            # gt invisible masks per image 
            i_masks = PolygonMasks(invisible)  
        else:
            assert mask_format == "bitmask", mask_format
            a_masks = []
            v_masks = []
            i_masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    a_masks.append(polygons_to_bitmask(segm, *image_size))
                    v_masks.append(polygons_to_bitmask(visible, *image_size))
                    i_masks.append(polygons_to_bitmask(invisible, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    a_masks.append(mask_util.decode(segm))
                    v_masks.append(mask_util.decode(visible))
                    i_masks.append(mask_util.decode(invisible))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    a_masks.append(segm)
                    v_masks.append(visible)
                    i_masks.append(invisible)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            a_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in a_masks])
            )
            v_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in v_masks])
            )
            i_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in i_masks])
            )
            
        # original mask head now is amodal mask head 
        target.gt_masks = a_masks
        target.gt_v_masks = v_masks
        target.gt_i_masks = i_masks
     
    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Exemplo n.º 7
0
    def process_inst_onnx(self, classes, scores, pred_inst, img_shape, vis=False):
        pred_inst = pred_inst.sigmoid()[0]
        pred_mask = pred_inst > self.inst_thres
        # object rescore.
        sum_masks = pred_mask.sum((1, 2)).float() + 1e-6
        seg_score = (pred_inst * pred_mask.float()).sum((1, 2)) / sum_masks
        # scores *= seg_score
        scores = scores * seg_score
        # keep = torch.argsort(scores, descending=True)
        dim = 0
        _, keep = torch.sort(scores, descending=True,dim=dim)

        pred_inst = pred_inst[keep]
        # pred_mask = pred_mask[keep]
        scores = scores[keep]
        classes = classes[keep]
        sum_masks = sum_masks[keep]

        print('keep: ', keep.shape)
        print('pred_inst: ', pred_inst.shape)
        print('pred_mask: ', pred_mask.shape)
        print('scores: ', scores.shape)
        print('classes: ', classes.shape)

        if vis:
            ori_shape = [720, 1280]
            # object score filter.
            keep = scores >= 0.05
            if keep.sum() == 0:
                result_instance = Instances(ori_shape, pred_masks=[], pred_boxes=[],
                                            pred_classes=[], scores=[])
                return {'instances': result_instance}
            pred_inst = pred_inst[keep]
            scores = scores[keep]
            classes = classes[keep]

            # sort and keep top_k
            keep = torch.argsort(scores, descending=True)
            keep = keep[:self.center_top_num]
            pred_inst = pred_inst[keep]
            scores = scores[keep].reshape(-1)
            classes = classes[keep].reshape(-1).to(torch.int32)

        pred_inst = F.interpolate(pred_inst.unsqueeze(0),
                                  scale_factor=self.common_stride,
                                  mode="bilinear",
                                  align_corners=False)[..., :img_shape[0], :img_shape[1]]
        if vis:
            pred_inst = F.interpolate(pred_inst,
                                      size=ori_shape,
                                      mode="bilinear",
                                      align_corners=False)[0]
            pred_mask = pred_inst > self.inst_thres
            pred_bitinst = BitMasks(pred_mask)
            result_instance = Instances(ori_shape,
                                        pred_masks=pred_bitinst,
                                        pred_boxes=pred_bitinst.get_bounding_boxes(),
                                        pred_classes=classes,
                                        scores=scores)
            return {"instances": result_instance}
        else:
            # let's visiualise the raw instances mask out (should be same output as TensorRT)
            # print('pred_inst shape: ', pred_inst.shape)
            # for i in pred_inst[0]:
            #     import numpy as np
            #     import cv2
            #     print(i.shape)
            #     i = i.cpu().numpy()
            #     print(i)
            #     cv2.imshow('aa', i)
            #     cv2.waitKey(0)
            return pred_inst, classes, scores
Exemplo n.º 8
0
    def __getitem__(self, idx):
        # Retrieve meta data of image
        img_data = self.meta_data[idx]

        # Load image
        path_img = os.path.join(self.root_dir,
                                'leftImg8bit',
                                self.split,
                                img_data['file_name'].split('_')[0],
                                img_data['file_name'].replace('gtFine_', ''))
        image = np.asarray(Image.open(path_img))

        # Get label info
        path_label = os.path.join(self.root_dir,
                                  'gtFine',
                                  'cityscapes_panoptic_'+self.split,
                                  img_data['labelfile_name'])
        panoptic = np.asarray(Image.open(path_label))
        panoptic = rgb2id(panoptic)

        # Get bbox info
        rpn_bbox = []
        class_bbox = []
        for seg in img_data['segments_info']:
            seg_category = self.semantic_class_mapper[seg['category_id']]
            if seg_category['isthing']:
                rpn_bbox.append(seg["bbox"])
                class_bbox.append(self.instance_class_mapper[seg['category_id']])

        # Apply augmentation with albumentations
        if self.transform is not None:
            transformed = self.transform(
                image=image,
                mask=panoptic,
                bboxes=rpn_bbox,
                class_labels=class_bbox
            )
            image = transformed['image']
            panoptic = transformed['mask']
            rpn_bbox = transformed['bboxes']
            class_bbox = transformed['class_labels']

        # Create instance class for detectron (Mask RCNN Head)
        instance = Instances(panoptic.shape)

        # Create semantic segmentation target with augmented data
        semantic = np.zeros_like(panoptic, dtype=np.long)
        rpn_mask = np.zeros_like(panoptic)
        instance_mask = []
        instance_cls = []

        for seg in img_data['segments_info']:
            seg_category = self.semantic_class_mapper[seg['category_id']]
            semantic[panoptic == seg["id"]] = seg_category['train_id']
            # If segmentation is a thing generate a mask for maskrcnn target
            # Collect information for RPN targets
            if seg_category['isthing']:
                seg_category = self.instance_class_mapper[seg['category_id']]
                mask = np.zeros_like(panoptic)
                mask[panoptic == seg["id"]] = 1 #seg_category['train_id']
                instance_cls.append(seg_category['train_id'])
                instance_mask.append(mask)
                # RPN targets
                rpn_mask[panoptic == seg["id"]] = 1

        # Create same size of bbox and mask instance
        if len(rpn_bbox) > 0:
            rpn_bbox = coco_to_pascal_bbox(np.stack([*rpn_bbox]))

            instance.gt_masks = BitMasks(instance_mask)
            instance.gt_classes = torch.as_tensor(instance_cls)
            instance.gt_boxes = Boxes(rpn_bbox)
        else:
            instance.gt_masks = BitMasks(torch.Tensor([]).view(0,1,1))
            instance.gt_classes = torch.as_tensor([])
            instance.gt_boxes = Boxes([])

        return {
            'image': np.array(image),
            'semantic': semantic,
            'instance': instance,
            'image_id': img_data['image_id']
        }
Exemplo n.º 9
0
def annotations_to_instances_with_attributes(annos,
                                             image_size,
                                             mask_format="polygon",
                                             load_attributes=False,
                                             max_attr_per_ins=16):
    """
    Extend the function annotations_to_instances() to support attributes
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    if len(annos) and load_attributes:
        attributes = -torch.ones(
            (len(annos), max_attr_per_ins), dtype=torch.int64)
        for idx, anno in enumerate(annos):
            if "attribute_ids" in anno:
                for jdx, attr_id in enumerate(anno["attribute_ids"]):
                    attributes[idx, jdx] = attr_id
        target.gt_attributes = attributes

    return target
Exemplo n.º 10
0
    def _desc_to_example(desc: Dict):
        # Detectron2 Model Input Format:
        # image: Tensor[C, H, W];
        # height, width: output height and width;
        # instances: Instances Object to training, with the following fields:
        #     "gt_boxes":
        #     "gt_classes":
        #     "gt_masks": a PolygonMasks or BitMasks object storing N masks, one for each instance.
        desc = copy.deepcopy(desc)  # it will be modified by code below
        image_path = os.path.join(images_dir, f'{desc["image_id"]}.jpg')
        # shape: [H, W, C]
        origin_image = detection_utils.read_image(image_path, format="BGR")
        oh, ow, oc = origin_height, origin_width, origin_channels = origin_image.shape

        if augmentations is not None:
            aug_input = T.AugInput(origin_image)
            transforms = augmentations(aug_input)
            auged_image = aug_input.image
        else:
            auged_image = origin_image
        ah, aw, ac = auged_height, auged_width, auged_channels = auged_image.shape

        if not is_train:
            return {
                "image_id":
                desc['image_id'],  # COCOEvaluator.process() need it.
                # expected shape: [C, H, W]
                "image":
                torch.as_tensor(
                    np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
                "height":
                auged_height,
                "width":
                auged_width,
            }

        target = Instances(image_size=(ah, aw))
        if 'fill gt_boxes':
            # shape: n_box, 4
            boxes_abs = np.array(
                [anno['bbox'] for anno in desc['annotations']])
            if augmentations is not None:
                # clip transformed bbox to image size
                boxes_auged = transforms.apply_box(
                    np.array(boxes_abs)).clip(min=0)
                boxes_auged = np.minimum(
                    boxes_auged,
                    np.array([aw, ah, aw, ah])[np.newaxis, :])
            else:
                boxes_auged = boxes_abs
            target.gt_boxes = Boxes(boxes_auged)
        if 'fill gt_classes':
            classes = [anno['category_id'] for anno in desc['annotations']]
            classes = torch.tensor(classes, dtype=torch.int64)
            target.gt_classes = classes
        if 'fill gt_masks':
            mask_paths = [
                os.path.join(masks_dir, f'{anno["mask_id"]}.png')
                for anno in desc['annotations']
            ]
            masks = np.array(
                list(
                    map(
                        lambda p: cv2.resize(cv2.imread(
                            p, flags=cv2.IMREAD_GRAYSCALE),
                                             dsize=(ow, oh)), mask_paths)))
            if augmentations is not None:
                masks_auged = np.array(
                    list(map(lambda x: transforms.apply_segmentation(x),
                             masks)))
            else:
                masks_auged = masks
            masks_auged = masks_auged > MASK_THRESHOLD
            masks_auged = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x))
                    for x in masks_auged
                ]))
            target.gt_masks = masks_auged

        return {
            "image_id":
            desc['image_id'],  # COCOEvaluator.process() need it.
            # expected shape: [C, H, W]
            "image":
            torch.as_tensor(
                np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
            "height":
            auged_height,
            "width":
            auged_width,
            "instances":
            target,  # refer: annotations_to_instances()
        }
Exemplo n.º 11
0
    def inference(self, box_cls, box_pred, mask_pred, image_sizes):
        """
        Arguments:
            box_cls (Tensor): tensor of shape (batch_size, num_queries, K).
                The tensor predicts the classification probability for each query.
            box_pred (Tensor): tensors of shape (batch_size, num_queries, 4).
                The tensor predicts 4-vector (x,y,w,h) box
                regression values for every queryx
            image_sizes (List[torch.Size]): the input image sizes

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(box_cls) == len(image_sizes)
        results = []

        # For each box we assign the best class or the second best if the best on is `no_object`.
        if self.use_focal_loss:
            prob = box_cls.sigmoid()
            # TODO make top-100 as an option for non-focal-loss as well
            scores, topk_indexes = torch.topk(prob.view(box_cls.shape[0], -1),
                                              100,
                                              dim=1)
            topk_boxes = topk_indexes // box_cls.shape[2]
            labels = topk_indexes % box_cls.shape[2]
        else:
            scores, labels = F.softmax(box_cls, dim=-1)[:, :, :-1].max(-1)

        for i, (
                scores_per_image,
                labels_per_image,
                box_pred_per_image,
                image_size,
        ) in enumerate(zip(scores, labels, box_pred, image_sizes)):
            result = Instances(image_size)
            boxes = box_cxcywh_to_xyxy(box_pred_per_image)
            if self.use_focal_loss:
                boxes = torch.gather(boxes.unsqueeze(0), 1,
                                     topk_boxes.unsqueeze(-1).repeat(
                                         1, 1, 4)).squeeze()
            result.pred_boxes = Boxes(boxes)

            result.pred_boxes.scale(scale_x=image_size[1],
                                    scale_y=image_size[0])
            if self.mask_on:
                mask = F.interpolate(
                    mask_pred[i].unsqueeze(0),
                    size=image_size,
                    mode="bilinear",
                    align_corners=False,
                )
                mask = mask[0].sigmoid() > 0.5
                B, N, H, W = mask_pred.shape
                mask = BitMasks(mask.cpu()).crop_and_resize(
                    result.pred_boxes.tensor.cpu(), 32)
                result.pred_masks = mask.unsqueeze(1).to(mask_pred[0].device)

            result.scores = scores_per_image
            result.pred_classes = labels_per_image
            results.append(result)
        return results
Exemplo n.º 12
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes
    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"]
                for obj in annos]  # it may be bitmask instead of polygon
        visible_segm = [obj["visible_mask"] for obj in annos
                        ]  # it may be bitmask instead of polygon

        if mask_format == "polygon":
            masks = PolygonMasks(segm)
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                visible_masks = PolygonMasks(visible_segm)
        else:
            assert mask_format == "bitmask", mask_format

            if not isinstance(segm[0], list):
                masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in segm]))
                # visible_masks = visible_polygons
                # visible_masks = BitMasks(torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                masks = BitMasks.from_polygon_masks(segm, *image_size)
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                # print('masks:{}'.format(polygons))
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # print('visible_masks:{}'.format(visible_polygons))
                visible_masks = BitMasks.from_polygon_masks(
                    visible_segm, *image_size)

        target.gt_masks = masks
        target.gt_visible_masks = visible_masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Exemplo n.º 13
0
    def read_from_ddict(self, ddict, inplace=True):
        """
        test
        """
        """
        Read ground truth annotations from data dicts.
        """ """

        Reads data dicts and stores the information as attributes of the InstanceSet object.
        The descriptions of the attributes are provided in the documentation for self.__init__().

        Parameters
        -----------
        ddict: list
            List of data dicts in format described below in Notes.

        inplace: bool
            If True, the object is modified in-place. Else, the InstanceSet object is returned.

        Returns
        -----------
        self (optinal): InstanceSet
            only returned if inplace == False

        Notes
        ------

        Data dicts should have the following format:
            -'file_name': str or Path object
                        path to image corresponding to annotations
            -'mask_format': str
                          'polygonmask' if segmentation masks are lists of XY coordinates, or
                          'bitmask'  if segmentation masks are RLE encoded segmentation masks
            -'height': int
                    image height in pixels
            -'width': int
                    image width in pixels
            -'annotations': list(dic)
                            list of annotations. See the annotation format below.
            -'num_instances': int
                        equal to len(annotations)- number of instances present in the image

        The dictionary format for the annotation dictionaries is as follows:
            -'category_id': int
                            numeric class label for the instance.
            -'bbox_mode': detectron2.structures.BoxMode object
                        describes the format of the bounding box coordinates.
                        The default is BoxMode.XYXY_ABS.
            -'bbox':  list(int)
                    4-element list of bbox coordinates
            -'segmentation': list
                            list containing:
                               - a list of polygon coordinates (mask format is polygonmasks)
                               - dictionaries  of RLE mask encodings (mask format is bitmasks)

        """

        # default values-always set
        self.pred_or_gt = 'gt'  # ddict assumed to be ground truth labels from get_ddict function

        # required values- function will error out if these are not set
        self.filepath = Path(ddict['file_name'])
        self.mask_format = ddict['mask_format']
        image_size = (ddict['height'], ddict['width'])
        # instances_gt = annotations_to_instances(ddict['annotations'], image_size, self.mask_format)

        class_idx = np.asarray(
            [anno['category_id'] for anno in ddict['annotations']], np.int)
        bbox = np.stack([anno['bbox'] for anno in ddict['annotations']])
        segs = [anno['segmentation'] for anno in ddict['annotations']]
        segtype = type(segs[0])
        if segtype == dict:
            # RLE encoded mask
            masks = RLEMasks(segs)

        elif segtype == np.ndarray:
            if segs[0].dtype == np.bool:
                #  bitmask
                masks = BitMasks(np.stack(segs))

        else:
            # list of (list or array) of coords in format [x0,y0,x1,y1,...xn,yn]
            masks = PolygonMasks(segs)

        instances = Instances(
            image_size, **{
                'masks': masks,
                'boxes': bbox,
                'class_idx': class_idx
            })
        self.instances = instances
        self.instances.colors = visualize.random_colors(
            len(instances), self.randomstate)

        # optional values- default to None if not in ddict
        self.dataset_class = ddict.get('dataset_class', None)
        HFW = ddict.get('HFW', None)
        HFW_units = None
        if HFW is not None:
            try:
                HFW = float(HFW)
            except ValueError:
                split = HFW.split(' ')
                if len(split) == 2:
                    HFW = float(split[0])
                    HFW_units = split[1]
        self.HFW = HFW
        self.HFW_units = HFW_units

        if not inplace:
            return self
        return
Exemplo n.º 14
0
def annotations_to_instances(annos,
                             image_size,
                             mask_format="polygon",
                             max_num_planes=20):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.
    Args:
        annos (list[dict]): a list of annotations, one per instance.
        image_size (tuple): height, width
    Returns:
        Instances: It will contains fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], BoxMode(obj["bbox_mode"]),
                        BoxMode.XYXY_ABS) for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert (
                        segm.ndim == 2
                    ), "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "plane" in annos[0]:
        plane = [torch.tensor(obj["plane"]) for obj in annos]
        plane_idx = [torch.tensor([i]) for i in range(len(plane))]
        target.gt_planes = torch.stack(plane, dim=0)
        target.gt_plane_idx = torch.stack(plane_idx, dim=0)
    return target