def create_instances(predictions, image_size):
    ret = Instances(image_size)

    score = np.asarray([x["score"] for x in predictions])
    chosen = (score > args.conf_threshold).nonzero()[0]
    score = score[chosen]
    bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
    bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)

    labels = np.asarray(
        [dataset_id_map(predictions[i]["category_id"]) for i in chosen])

    ret.scores = score
    ret.pred_boxes = Boxes(bbox)
    ret.pred_classes = labels

    try:
        ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
    except KeyError:
        pass
    return ret
Пример #2
0
    def boxlist_to_tensor(boxlist, output_box_dim):
        if type(boxlist) == np.ndarray:
            box_tensor = torch.from_numpy(boxlist)
        elif type(boxlist) == list:
            if boxlist == []:
                return torch.zeros((0, output_box_dim), dtype=torch.float32)
            else:
                box_tensor = torch.FloatTensor(boxlist)
        else:
            raise Exception("Unrecognized boxlist type")

        input_box_dim = box_tensor.shape[1]
        if input_box_dim != output_box_dim:
            if input_box_dim == 4 and output_box_dim == 5:
                box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS,
                                             BoxMode.XYWHA_ABS)
            else:
                raise Exception(
                    "Unable to convert from {}-dim box to {}-dim box".format(
                        input_box_dim, output_box_dim))
        return box_tensor
Пример #3
0
def gen_crop_transform_with_instance(crop_size,
                                     image_size,
                                     instances,
                                     crop_box=True):
    """
    Generate a CropTransform so that the cropping region contains
    the center of the given instance.

    Args:
        crop_size (tuple): h, w in pixels
        image_size (tuple): h, w
        instance (dict): an annotation dict of one instance, in Detectron2's
            dataset format.
    """
    instance = np.random.choice(instances),
    instance = instance[0]
    crop_size = np.asarray(crop_size, dtype=np.int32)
    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                           BoxMode.XYXY_ABS)
    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
    assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
            ), "The annotation bounding box is outside of the image!"
    assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
            ), "Crop size is larger than image size!"

    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))

    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)

    # if some instance is cropped extend the box
    if not crop_box:
        modified = True
        while modified:
            modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size,
                                                      instances)

    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
Пример #4
0
def instances_to_json(instances, img_id=None):
    num_instance = len(instances)
    if num_instance == 0:
        return []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()

    has_mask = instances.has("pred_masks_rle")
    if has_mask:
        rles = instances.pred_masks_rle

    has_keypoints = instances.has("pred_keypoints")
    if has_keypoints:
        keypoints = instances.pred_keypoints

    results = []
    for k in range(num_instance):
        result = {
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
        }
        if img_id:
            result["image_id"] = img_id
        if has_mask:
            result["segmentation"] = rles[k]
        if has_keypoints:
            # In COCO annotations,
            # keypoints coordinates are pixel indices.
            # However our predictions are floating point coordinates.
            # Therefore we subtract 0.5 to be consistent with the annotation format.
            # This is the inverse of data loading logic in `datasets/coco.py`.
            keypoints[k][:, :2] -= 0.5
            result["keypoints"] = keypoints[k].flatten().tolist()
        results.append(result)
    return results
Пример #5
0
def prediction_to_dict(instances, img_id):
    """
    Args:
        instances (Instances): the output of the model
        img_id (str): the image id in COCO

    Returns:
        list[dict]: the results in densepose evaluation format
    """
    scores = instances.scores.tolist()
    segmentations = ToMaskConverter.convert(
        instances.pred_densepose, instances.pred_boxes, instances.image_size
    )
    raw_boxes_xywh = BoxMode.convert(
        instances.pred_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
    )

    results = []
    for k in range(len(instances)):
        densepose_results_quantized = quantize_densepose_chart_result(
            ToChartResultConverter.convert(instances.pred_densepose[k], instances.pred_boxes[k])
        )
        densepose_results_quantized.labels_uv_uint8 = (
            densepose_results_quantized.labels_uv_uint8.cpu()
        )
        segmentation = segmentations.tensor[k]
        segmentation_encoded = mask_utils.encode(
            np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])
        )
        segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8")
        result = {
            "image_id": img_id,
            "category_id": 1,  # densepose only has one class
            "bbox": raw_boxes_xywh[k].tolist(),
            "score": scores[k],
            "densepose": densepose_results_quantized,
            "segmentation": segmentation_encoded,
        }
        results.append(result)
    return results
Пример #6
0
    def draw_dataset_dict(self, dic):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:

            boxes = [
                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
                if x["bbox_mode"] != BoxMode.XYWHA_ABS else x["bbox"]
                for x in annos
            ]

            labels = [x["category_id"] for x in annos]
            colors = None

            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                    "thing_colors"):
                colors = [
                    self._jitter(
                        [x / 255 for x in self.metadata.thing_colors[c]])
                    for c in labels
                ]
            names = self.metadata.get("thing_classes", None)
            if names:
                labels = [names[i] for i in labels]
            labels = [
                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
                for i, a in zip(labels, annos)
            ]
            self.overlay_instances(labels=labels,
                                   boxes=boxes,
                                   assigned_colors=colors)
        return self.output
Пример #7
0
    def draw_dataset_dict(self, dic):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos]

            labels = [x["category_id"] for x in annos]
            names = self.metadata.get("thing_classes", None)
            if names:
                labels = [names[i] for i in labels]
            labels = [
                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
                for i, a in zip(labels, annos)
            ]
            self.overlay_instances(labels=labels, boxes=boxes, masks=masks, keypoints=keypts)

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            sem_seg = cv2.imread(dic["sem_seg_file_name"], cv2.IMREAD_GRAYSCALE)
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
        return self.output
Пример #8
0
def transform_instance_annotations(
        annotation, transforms, image_size, *, add_meta_infos=False
):
    """
    Apply transforms to box and meta_infos annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        add_meta_infos (bool): Whether to apply meta_infos.

    Returns:
        dict:
            the same input dict with fields "bbox", "meta_infos"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # bbox is 1d (per-instance bounding box)
    bbox = BoxMode.convert(
        annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
    # clip transformed bbox to image size
    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    # add meta_infos
    if add_meta_infos:
        meta_infos = dict()
        meta_infos = transforms.apply_meta_infos(meta_infos)
        annotation["meta_infos"] = meta_infos
    return annotation
def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        min_box_side_len (int): keep proposals with at least this size
        proposal_topk (int): only keep top-K scoring proposals

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """
    if "proposal_boxes" in dataset_dict:
        # Transform proposal boxes
        boxes = transforms.apply_box(
            BoxMode.convert(
                dataset_dict.pop("proposal_boxes"),
                dataset_dict.pop("proposal_bbox_mode"),
                BoxMode.XYXY_ABS,
            )
        )
        boxes = Boxes(boxes)
        objectness_logits = torch.as_tensor(
            dataset_dict.pop("proposal_objectness_logits").astype("float32")
        )

        boxes.clip(image_shape)
        keep = boxes.nonempty(threshold=min_box_side_len)
        boxes = boxes[keep]
        objectness_logits = objectness_logits[keep]

        proposals = Instances(image_shape)
        proposals.proposal_boxes = boxes[:proposal_topk]
        proposals.objectness_logits = objectness_logits[:proposal_topk]
        dataset_dict["proposals"] = proposals
Пример #10
0
def transform_instance_annotations(
    annotation, transforms, image_size, *, keypoint_hflip_indices=None
):
    """
    Apply transforms to box, segmentation and keypoints of annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
        transforms (TransformList):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
    # Note that bbox is 1d (per-instance bounding box)
    annotation["bbox"] = transforms.apply_box([bbox])[0]
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    if "segmentation" in annotation:
        # each instance contains 1 or more polygons
        polygons = [np.asarray(p).reshape(-1, 2) for p in annotation["segmentation"]]
        annotation["segmentation"] = [p.reshape(-1) for p in transforms.apply_polygons(polygons)]

    if "keypoints" in annotation:
        keypoints = transform_keypoint_annotations(
            annotation["keypoints"], transforms, image_size, keypoint_hflip_indices
        )
        annotation["keypoints"] = keypoints

    return annotation
Пример #11
0
    def process_annotation(self, ann, mask_side_len=28):
        # Parse annotation data
        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
        height, width = img_info["height"], img_info["width"]
        gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]]
        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)

        # Run rasterize ..
        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4)
        box_bitmasks = {
            "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
            "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len),
            "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize(
                torch_gt_bbox, mask_side_len
            )[0],
        }

        # Run paste ..
        results = defaultdict(dict)
        for k, box_bitmask in box_bitmasks.items():
            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
            scaled_boxes = scale_boxes(torch_gt_bbox, scale)

            r = results[k]
            r["old"] = paste_mask_in_image_old(
                padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5
            )
            r["aligned"] = paste_masks_in_image(
                box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width)
            )[0]

        table = []
        for rasterize_method, r in results.items():
            for paste_method, mask in r.items():
                mask = np.asarray(mask)
                iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask)
                table.append((rasterize_method, paste_method, iou))
        return table
Пример #12
0
    def __call__(self, instances: Instances) -> DensePoseList:
        """
        Convert DensePose predictions (an instance of `DensePoseOutput`)
        into DensePose annotations data (an instance of `DensePoseList`)
        """
        boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
        boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS,
                                         BoxMode.XYWH_ABS)
        dp_datas = []
        for i, box_xywh in enumerate(boxes_xywh_abs):
            labels_i, result_i = resample_output_to_bbox(
                instances.pred_densepose[i], box_xywh,
                self._confidence_channels())
            annotation_i = self._sample(labels_i.cpu(), result_i.cpu(),
                                        box_xywh)
            annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask(
                instances.pred_densepose[i])

            dp_datas.append(DensePoseDataRelative(annotation_i))
        # create densepose annotations on CPU
        dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size)
        return dp_list
Пример #13
0
    def __getitem__(self, index):
        ann = self.coco[index]

        # bbox transform.
        bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                               BoxMode.XYXY_ABS)  # x1y1x2y2
        bbox = Boxes(bbox)

        # mask transform.
        mask = PolygonMasks([ann["segmentation"]])
        mask = mask.crop_and_resize(bbox.tensor, self.size).float()
        if self.transform:
            if torch.rand(1) < 0.5:
                mask = mask.flip(2)

        # introduce several noise.
        noise_matrix = VALUE_NOISE * torch.rand(mask.shape)
        mask = torch.where(mask > noise_matrix, mask - noise_matrix,
                           noise_matrix)

        return mask
Пример #14
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    # attributes = [obj["attributes"] for obj in annos]
    attributes = []
    for obj in annos:
        if "attributes" in obj.keys():
            attributes.append(obj["attributes"])
        else:
            attributes.append([-1] * 16)
    attributes = torch.tensor(attributes, dtype=torch.int64)
    target.gt_attributes = attributes

    return target
Пример #15
0
    def instances_to_json(instances):
        num_instance = len(instances)
        if num_instance == 0:
            return []

        boxes = instances.gt_boxes.tensor.numpy()
        if boxes.shape[1] == 4:
            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
        boxes = boxes.tolist()
        # scores = instances.scores.tolist()
        classes = instances.gt_classes.tolist()

        results = []
        for k in range(num_instance):
            result = {
                "category_id": classes[k],
                "bbox": boxes[k],
                "bbox_mode": BoxMode.XYWH_ABS,
            }

            results.append(result)
        return results
Пример #16
0
 def _add_densepose_masks_as_segmentation(self, annotations: Dict[str, Any],
                                          image_shape_hw: Tuple[int, int]):
     for obj in annotations:
         if ("densepose" not in obj) or ("segmentation" in obj):
             continue
         # DP segmentation: torch.Tensor [S, S] of float32, S=256
         segm_dp = torch.zeros_like(obj["densepose"].segm)
         segm_dp[obj["densepose"].segm > 0] = 1
         segm_h, segm_w = segm_dp.shape
         bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1),
                                     dtype=torch.float32)
         # image bbox
         x0, y0, x1, y1 = (v.item() for v in BoxMode.convert(
             obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS))
         segm_aligned = (ROIAlign(
             (y1 - y0, x1 - x0), 1.0, 0,
             aligned=True).forward(segm_dp.view(1, 1, *segm_dp.shape),
                                   bbox_segm_dp).squeeze())
         image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
         image_mask[y0:y1, x0:x1] = segm_aligned
         # segmentation for BitMask: np.array [H, W] of np.bool
         obj["segmentation"] = image_mask >= 0.5
def gen_crop_transform_with_instance(crop_size, image_size, instance):
    """
    Generate a CropTransform so that the cropping region contains
    the center of the given instance.

    Args:
        crop_size (tuple): h, w in pixels
        image_size (tuple): h, w
        instance (dict): an annotation dict of one instance, in Detectron2's
            dataset format.
    """
    crop_size = np.asarray(crop_size, dtype=np.int32)
    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5

    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))

    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
Пример #18
0
def annotations_to_instances(bboxes, bbox_classes, image_size):
    """
        Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict.

        Args:
            bboxes (ndarray): numpy array of shape (K, 4) where K denotes no. of objects in the image and 4 bounding box coordinate for each object
            bbox_classes (ndarray): numpy array of shape (K,) holding dummy values for class label where K denotes no. of objects in the image
            image_size (tuple): height, width

        Returns:
            Instances: It will contain fields "bboxes", "classes", This is the format that builtin Detectron models expect.
    """
    boxes = [
        BoxMode.convert(obj, BoxMode.XYXY_ABS, BoxMode.XYXY_ABS)
        for obj in bboxes
    ]
    target = Instances(image_size)
    target.bboxes = Boxes(boxes)
    classes = [int(obj) for obj in bbox_classes]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.classes = classes
    return target
Пример #19
0
def create_instances(prediction, image_size):
    ret = Instances(image_size)

    scores = []
    pred_boxes = []
    pred_classes = []
    for instance in prediction["instances"]:
        scores.append(instance["score"])
        pred_boxes.append(instance["bbox"])
        pred_classes.append(instance["category_id"])

    scores = np.asarray(scores)
    pred_boxes = np.asarray(pred_boxes).reshape(-1, 4)
    pred_boxes = BoxMode.convert(pred_boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)

    labels = np.asarray(pred_classes)

    ret.scores = scores
    ret.pred_boxes = Boxes(pred_boxes)
    ret.pred_classes = labels

    return ret
Пример #20
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        polygons = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(polygons)
        else:
            assert mask_format == "bitmask", mask_format
            masks = BitMasks.from_polygon_masks(polygons, *image_size)
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Пример #21
0
def annotations_to_instances(annos, image_size):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes", "isactive",  "gt_actions"
            if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "isactive" in annos[0]:
        isactive = [obj["isactive"] for obj in annos]
        isactive = torch.tensor(isactive, dtype=torch.int64)
        target.gt_isactive = isactive

    if len(annos) and "actions" in annos[0]:
        actions = np.stack([obj.get("actions", []) for obj in annos], axis=0)
        target.gt_actions = Interactions(actions)

    return target
    def instances_to_json(self, instances, img_id):
        num_instance = len(instances)
        if num_instance == 0:
            return []

        boxes = instances.pred_boxes.tensor.numpy()
        if boxes.shape[1] == 4:
            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
        boxes = boxes.tolist()
        scores = instances.scores.tolist()
        classes = instances.pred_classes.tolist()

        results = []
        for k in range(num_instance):
            result = {
                "image_id": img_id,
                "category_id": classes[k],
                "bbox": boxes[k],
                "score": scores[k],
            }

            results.append(result)
        return results
Пример #23
0
def predictor_output_with_fine_and_coarse_segm_to_mask(
        predictor_output: Any, boxes: Boxes,
        image_size_hw: ImageSizeType) -> BitMasks:
    """
    Convert predictor output with coarse and fine segmentation to a mask.
    Assumes that predictor output has the following attributes:
     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
         unnormalized scores for N instances; D is the number of coarse
         segmentation labels, H and W is the resolution of the estimate
     - fine_segm (tensor of size [N, C, H, W]): fine segmentation
         unnormalized scores for N instances; C is the number of fine
         segmentation labels, H and W is the resolution of the estimate

    Args:
        predictor_output: DensePose predictor output to be converted to mask
        boxes (Boxes): bounding boxes that correspond to the DensePose
            predictor outputs
        image_size_hw (tuple [int, int]): image height Himg and width Wimg
    Return:
        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
        a mask of the size of the image for each instance
    """
    H, W = image_size_hw
    boxes_xyxy_abs = boxes.tensor.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS,
                                     BoxMode.XYWH_ABS)
    N = len(boxes_xywh_abs)
    masks = torch.zeros((N, H, W),
                        dtype=torch.bool,
                        device=boxes.tensor.device)
    for i in range(len(boxes_xywh_abs)):
        box_xywh = make_int_box(boxes_xywh_abs[i])
        labels_i = resample_fine_and_coarse_segm_to_bbox(
            predictor_output[i], box_xywh)
        x, y, w, h = box_xywh
        masks[i, y:y + h, x:x + w] = labels_i > 0
    return BitMasks(masks)
Пример #24
0
    def _add_densepose_body_semantics(self, annotations: List[Any],
                                      image_shape_hw: Tuple[int, int]):
        body_semantics = torch.zeros(
            [1, 1, image_shape_hw[0], image_shape_hw[1]], dtype=torch.int)
        for obj in annotations:
            if ("densepose" not in obj) or obj["densepose"] is None:
                continue
            segm_dp = obj["densepose"].segm
            # segm_h, segm_w = segm_dp.shape
            # image bbox
            x0, y0, x1, y1 = (v.round().astype(
                np.int).item() for v in BoxMode.convert(
                    obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS))
            y1 = min(y1, image_shape_hw[0] - 1)
            x1 = min(x1, image_shape_hw[1] - 1)
            segm_dp = F.interpolate(segm_dp.view(1, 1, *segm_dp.shape),
                                    size=(y1 - y0, x1 - x0))
            body_semantics_tmp = torch.zeros_like(body_semantics)
            body_semantics_tmp[:, :, y0:y1, x0:x1] = segm_dp
            # pdb.set_trace()
            body_semantics_tmp *= (body_semantics == 0).int()
            body_semantics += body_semantics_tmp
            # rois = torch.tensor([[0, x0,y0,x1-x0,y1-y0]])
            # rois = torch.tensor([[0, 100,100,500,500]])
            # segm_aligned = (
            #     ROIAlign(image_shape_hw, 1.0, 0, aligned=True).forward(segm_dp.view(1, 1, *segm_dp.shape), rois)
            #     .squeeze()
            # )
            # pdb.set_trace()
            # # image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
            # # image_mask[y0:y1, x0:x1] = segm_aligned
            # # # segmentation for BitMask: np.array [H, W] of np.bool
            # # pdb.set_trace()
            # # obj["body_semantics"] = image_mask >= 0.5

            # obj["body_semantics"] = obj["densepose"].segm
        return body_semantics.squeeze()
Пример #25
0
    def get_transform(self, image: np.ndarray, annotations: List[Any]) -> Transform:
        """
        This function will modify instances to set the iscrowd flag to 1 for
        annotations not picked. It relies on the dataset mapper to filter those
        items out
        """
        assert isinstance(annotations, (list, tuple)), annotations
        assert all("bbox" in x for x in annotations), annotations
        assert all("bbox_mode" in x for x in annotations), annotations

        image_size = image.shape[:2]

        # filter out iscrowd
        annotations = [x for x in annotations if x.get("iscrowd", 0) == 0]
        if len(annotations) == 0:
            return NoOpTransform()

        sel_index = np.random.randint(len(annotations))
        # set iscrowd flag of other annotations to 1 so that they will be
        #   filtered out by the datset mapper (https://fburl.com/diffusion/fg64cb4h)
        for idx, instance in enumerate(annotations):
            if idx != sel_index:
                instance["iscrowd"] = 1
        instance = annotations[sel_index]

        bbox_xywh = BoxMode.convert(
            instance["bbox"], instance["bbox_mode"], BoxMode.XYWH_ABS
        )

        scale = np.random.uniform(*self.crop_scale)
        bbox_xywh = bu.scale_bbox_center(bbox_xywh, scale)
        bbox_xywh = bu.clip_box_xywh(bbox_xywh, image_size).int()

        return CropTransform(
            *bbox_xywh.tolist(), orig_h=image_size[0], orig_w=image_size[1]
        )
Пример #26
0
def prediction_to_json(instances, img_id):
    """
    Args:
        instances (Instances): the output of the model
        img_id (str): the image id in COCO

    Returns:
        list[dict]: the results in densepose evaluation format
    """
    scores = instances.scores.tolist()
    segmentations = densepose_to_mask(instances)

    boxes = instances.pred_boxes.tensor.clone()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    instances.pred_densepose = instances.pred_densepose.to_result(boxes)

    results = []
    for k in range(len(instances)):
        densepose = instances.pred_densepose[k]
        segmentation = segmentations.tensor[k]
        segmentation_encoded = mask_utils.encode(
            np.require(segmentation.numpy(),
                       dtype=np.uint8,
                       requirements=["F"]))
        segmentation_encoded["counts"] = segmentation_encoded["counts"].decode(
            "utf-8")
        result = {
            "image_id": img_id,
            "category_id": 1,  # densepose only has one class
            "bbox": densepose[1],
            "score": scores[k],
            "densepose": densepose,
            "segmentation": segmentation_encoded,
        }
        results.append(result)
    return results
Пример #27
0
def aug_gt_instances_to_coco_json(instances, img_id, output_height,
                                  output_width):
    num_instance = len(instances)
    if num_instance == 0:
        return []

    # 1. scale box to output size
    img_size = instances.image_size  # h, w
    scale_x, scale_y = (output_width / img_size[1],
                        output_height / img_size[0])
    results = Instances((output_height, output_width),
                        **instances.get_fields())

    output_boxes = instances.gt_boxes
    output_boxes.scale(scale_x, scale_y)  # xyxy
    output_boxes.clip(results.image_size)

    instances = results[output_boxes.nonempty()]

    # 2. convert to coco
    boxes = instances.gt_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    classes = instances.gt_classes.tolist()

    results = []
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k] + 1,
            "bbox": boxes[k],
            "area": boxes[k][2] * boxes[k][3],
            "iscrowd": 0,
        }
        results.append(result)
    return results
Пример #28
0
def instances_to_coco_json(instances, img_id):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances)
    if num_instance == 0:
        print("no pre")
        return []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()
    results = []
    id_1,id_2,id_3,id_4=0,0,0,0
    for k in range(num_instance):
        if classes[k]==0:
            id_1+=1
        if classes[k]==1:
            id_2+=1
        if classes[k]==2:
            id_3+=1    
        if classes[k]==3:
            id_4+=1      
        result = {"image_id": img_id,"category_id": classes[k]+1,"bbox": boxes[k],"score": scores[k],}
        results.append(result)
    cid=[id_1,id_2,id_3,id_4]
    return results,num_instance,cid
Пример #29
0
def transform_instance_annotations(annotation, transforms, image_size):
    """
    Apply transforms to box of annotations of a single instance.

    It will use `transforms.apply_box` for the box,.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
        transforms (TransformList):
        image_size (tuple): the height, width of the transformed image

    Returns:
        dict: the same input dict with fields "bbox" transformed according to
              `transforms`. The "bbox_mode" field will be set to XYXY_ABS.
    """
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"],
                           BoxMode.XYXY_ABS)
    # Note that bbox is 1d (per-instance bounding box)
    annotation["bbox"] = transforms.apply_box([bbox])[0]
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    return annotation
Пример #30
0
def _evaluate_box_proposals(dataset_predictions,
                            coco_api,
                            thresholds=None,
                            area="all",
                            limit=None):
    """
    Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in dataset_predictions:
        predictions = prediction_dict["proposals"]

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = predictions.objectness_logits.sort(descending=True)[1]
        predictions = predictions[inds]

        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
        anno = coco_api.loadAnns(ann_ids)
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in anno if obj["iscrowd"] == 0
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_areas = torch.as_tensor(
            [obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <=
                                                       area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if limit is not None and len(predictions) > limit:
            predictions = predictions[:limit]

        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else
                   torch.zeros(0, dtype=torch.float32))
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }