Пример #1
0
def evaluate_box_proposal(predictions,
                          dic,
                          limit=100,
                          threshold=0.5,
                          aspect_ratio_range=(0, 1 / 3)):
    gt_overlaps = []
    num_pos = 0

    anno = dic["annotations"]
    new_dic = []
    gt_boxes = [
        BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        for obj in anno
    ]
    gt_aspect_ratios = [
        ratio_of_polygon(obj["segmentation"])
        if not obj["iscrowd"] else ratio_of_bbox(obj["bbox"]) for obj in anno
    ]
    gt_boxes = torch.as_tensor(gt_boxes).reshape(-1,
                                                 4)  # guard against no boxes
    gt_boxes = Boxes(gt_boxes)
    gt_aspect_ratios = torch.as_tensor(gt_aspect_ratios)

    if len(gt_boxes) == 0:
        return None

    predict_boxes = [
        BoxMode.convert(prediction['bbox'], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        for prediction in predictions
    ]
    predict_boxes = torch.as_tensor(predict_boxes).reshape(-1, 4)
    predict_boxes = Boxes(predict_boxes)

    valid_gt_inds = (gt_aspect_ratios >= aspect_ratio_range[0]) & \
                    (gt_aspect_ratios <= aspect_ratio_range[1])
    gt_boxes = gt_boxes[valid_gt_inds]

    if len(gt_boxes) == 0 or len(predictions) == 0:
        return None

    num_pos += len(gt_boxes)
    if limit is not None and len(predictions) > limit:
        predict_boxes = predict_boxes[:limit]

    overlaps = pairwise_iou(predict_boxes, gt_boxes)

    selected_gt = [anno[i] for i, bl in enumerate(valid_gt_inds) if bl]
    selected_pred = []
    _gt_overlaps = torch.zeros(len(gt_boxes))
    pred_classes = []
    for j in range(min(len(predictions), len(gt_boxes))):
        # find which proposal box maximally covers each gt box
        # and get the iou amount of coverage for each gt box
        max_overlaps, argmax_overlaps = overlaps.max(dim=0)

        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ovr, gt_ind = max_overlaps.max(dim=0)
        assert gt_ovr >= 0
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps[j] = overlaps[box_ind, gt_ind]

        overlaped_box_ind = overlaps[:, gt_ind] > threshold
        if overlaped_box_ind.sum() > 0:
            pred_classes += [
                predictions[i]["category_id"]
                for i, bl in enumerate(overlaped_box_ind) if bl
            ]
            selected_pred += [
                predictions[i] for i, bl in enumerate(overlaped_box_ind) if bl
            ]
        assert _gt_overlaps[j] == gt_ovr
        # mark the proposal box and the gt box as used
        overlaps[box_ind, :] = -1
        overlaps[:, gt_ind] = -1

    # append recorded iou coverage level
    gt_overlaps = _gt_overlaps
    gt_overlaps, _ = torch.sort(gt_overlaps)

    dic["annotations"] = selected_gt
    return selected_pred, dic
Пример #2
0
 def _convert_xywh_to_xywha(self, x):
     return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
Пример #3
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array(
            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
        )
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]

        labels = [objs[cat_id] for cat_id in cat_ids]
        for _i in range(len(annos)):
            img_vis = vis_image_mask_bbox_cv2(
                img, masks[_i : _i + 1], bboxes=bboxes_xyxy[_i : _i + 1], labels=labels[_i : _i + 1]
            )
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
            xyz_path = annos[_i]["xyz_path"]
            xyz_info = mmcv.load(xyz_path)
            x1, y1, x2, y2 = xyz_info["xyxy"]
            xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
            xyz = np.zeros((imH, imW, 3), dtype=np.float32)
            xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
            xyz_show = get_emb_show(xyz)
            xyz_crop_show = get_emb_show(xyz_crop)
            img_xyz = img.copy() / 255.0
            mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
            fg_idx = np.where(mask_xyz != 0)
            img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
            img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
            img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
            # diff mask
            diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]

            grid_show(
                [
                    img[:, :, [2, 1, 0]],
                    img_vis[:, :, [2, 1, 0]],
                    img_vis_kpts2d[:, :, [2, 1, 0]],
                    depth,
                    # xyz_show,
                    diff_mask_xyz,
                    xyz_crop_show,
                    img_xyz[:, :, [2, 1, 0]],
                    img_xyz_crop[:, :, [2, 1, 0]],
                    img_vis_crop,
                ],
                [
                    "img",
                    "vis_img",
                    "img_vis_kpts2d",
                    "depth",
                    "diff_mask_xyz",
                    "xyz_crop_show",
                    "img_xyz",
                    "img_xyz_crop",
                    "img_vis_crop",
                ],
                row=3,
                col=3,
            )
Пример #4
0
def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
    """
    Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0 ** 2, 1e5 ** 2],  # all
        [0 ** 2, 32 ** 2],  # small
        [32 ** 2, 96 ** 2],  # medium
        [96 ** 2, 1e5 ** 2],  # large
        [96 ** 2, 128 ** 2],  # 96-128
        [128 ** 2, 256 ** 2],  # 128-256
        [256 ** 2, 512 ** 2],  # 256-512
        [512 ** 2, 1e5 ** 2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in dataset_predictions:
        predictions = prediction_dict["proposals"]

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = predictions.objectness_logits.sort(descending=True)[1]
        predictions = predictions[inds]

        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
        anno = coco_api.loadAnns(ann_ids)
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in anno
            if obj["iscrowd"] == 0
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if limit is not None and len(predictions) > limit:
            predictions = predictions[:limit]

        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Пример #5
0
 def _convert_xy_to_wh(self, x):
     return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
Пример #6
0
def combine_association(instance, association):
    pred_masks = [mask.numpy() for mask in instance.pred_masks]
    pred_scores = instance.scores.numpy()
    pred_boxes = instance.pred_boxes.tensor.numpy().tolist()
    pred_classes = instance.pred_classes.numpy()
    h, w = pred_masks[0].shape
    pred_associations = instance.pred_associations.numpy()
    pred_light = association.pred_light.tensor.numpy()
    ret = Instances((h, w))
    ins = Instances((h, w))

    if np.sum(pred_associations) == 0:
        ret.pred_boxes = association.pred_boxes
        ret.scores = association.scores
        ret.pred_classes = association.pred_classes
        ret.pred_light = association.pred_light.tensor.numpy().tolist()
        segm = np.zeros((h, w, 1), order='F', dtype='uint8')
        ret.pred_masks = [segm] * len(association.pred_boxes)
        ret.pred_associations = association.pred_associations.numpy().astype(
            'int').tolist()
        instance.pred_associations = pred_associations.astype('int').tolist()
        return ret, instance

    mask_map = {}
    for i, ass in enumerate(pred_associations):
        if ass != 0:
            if ass in mask_map:
                if pred_classes[i] == 1:
                    mask_map[ass].append((pred_masks[i], pred_scores[i],
                                          pred_classes[i], pred_boxes[i]))
                else:
                    mask_map[ass] = [(pred_masks[i], pred_scores[i],
                                      pred_classes[i], pred_boxes[i]),
                                     mask_map[ass][0]]
            else:

                mask_map[ass] = [(pred_masks[i], pred_scores[i],
                                  pred_classes[i], pred_boxes[i])]

    results = []
    boxes = []
    scores = []
    classes = []
    associations = []
    light = []

    for i, ass in enumerate(association.pred_associations):
        if ass != 0:
            light.append(pred_light[i].tolist())

    for k, v in mask_map.items():
        associations.append(int(k))
        s, o = v
        avg_score = float((s[1] + o[1]) / 2)
        _s = s[0].reshape(h, w, 1)
        _o = o[0].reshape(h, w, 1)

        comb = _s + _o
        classes.append(0)
        segm = encode(np.array(comb, order='F', dtype='uint8'))[0]
        boxes.append(
            BoxMode.convert(eval.maskUtils.toBbox(segm), BoxMode.XYWH_ABS,
                            BoxMode.XYXY_ABS))
        results.append(comb)
        scores.append(avg_score)

    ret.pred_masks = results
    ret.pred_boxes = boxes
    ret.scores = scores
    ret.pred_classes = classes
    ret.pred_associations = associations
    ret.pred_light = light

    instance.pred_associations = instance.pred_associations.numpy().astype(
        'int').tolist()

    return ret, instance
Пример #7
0
def transform_instance_annotations(annotation,
                                   transforms,
                                   image_size,
                                   *,
                                   keypoint_hflip_indices=None):
    """
    Apply transforms to box, segmentation and keypoints annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # bbox is 1d (per-instance bounding box)
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"],
                           BoxMode.XYXY_ABS)
    # clip transformed bbox to image size
    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    if "segmentation" in annotation:
        # each instance contains 1 or more polygons
        segm = annotation["segmentation"]
        if isinstance(segm, list):
            # polygons
            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
            annotation["segmentation"] = [
                p.reshape(-1) for p in transforms.apply_polygons(polygons)
            ]
        elif isinstance(segm, dict):
            # RLE
            mask = mask_util.decode(segm)
            mask = transforms.apply_segmentation(mask)
            assert tuple(mask.shape[:2]) == image_size
            annotation["segmentation"] = mask
        else:
            raise ValueError(
                "Cannot transform segmentation of type '{}'!"
                "Supported types are: polygons as list[list[float] or ndarray],"
                " COCO-style RLE as a dict.".format(type(segm)))

    if "keypoints" in annotation:
        keypoints = transform_keypoint_annotations(annotation["keypoints"],
                                                   transforms, image_size,
                                                   keypoint_hflip_indices)
        annotation["keypoints"] = keypoints

    return annotation
Пример #8
0
            pred_by_image[dic["image_id"]], img.shape[:2])
        if not len(predictions) > 0:
            continue
        grouped_gt = vis.group_by(dic["annotations"], ratios, ratios_ranges)

        visualized = False
        for range_name in ratios_ranges.keys():
            if not len(grouped_gt[range_name]) > 0:
                continue
            visualized = True

            vis = Visualizer(img, metadata, scale=scale)
            topk_boxes, topk_indices = vis.topk_iou_boxes(
                predictions.pred_boxes,
                Boxes([
                    BoxMode.convert(x["bbox"], BoxMode.XYWH_ABS,
                                    BoxMode.XYXY_ABS)
                    for x in grouped_gt[range_name]
                ]))
            topk_indices = topk_indices.reshape((-1, ))
            # Transform indices to list since shape 1 tensors will be regarded as scalars.
            vis.draw_dataset_dict({"annotations": grouped_gt[range_name]})
            vis_boxes = vis.draw_instance_predictions(
                predictions[topk_indices.tolist()])

            if args.show:
                webcv2.imshow(basename + "-boxes@" + range_name,
                              vis_boxes.get_image()[..., ::-1])
            else:
                save(vis_boxes.get_image()[..., ::-1], args.output, "boxes",
                     basename + "@%s.jpg" % range_name)
Пример #9
0
def test_vis():
    dset_name = sys.argv[1]
    assert dset_name in DatasetCatalog.list()

    meta = MetadataCatalog.get(dset_name)
    dprint("MetadataCatalog: ", meta)
    objs = meta.objs

    t_start = time.perf_counter()
    dicts = DatasetCatalog.get(dset_name)
    logger.info("Done loading {} samples with {:.3f}s.".format(
        len(dicts),
        time.perf_counter() - t_start))

    dirname = "output/{}-data-vis".format(dset_name)
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = read_image_cv2(d["file_name"], format="BGR")
        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0

        imH, imW = img.shape[:2]
        annos = d["annotations"]
        masks = [
            cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos
        ]
        bboxes = [anno["bbox"] for anno in annos]
        bbox_modes = [anno["bbox_mode"] for anno in annos]
        bboxes_xyxy = np.array([
            BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS)
            for box, box_mode in zip(bboxes, bbox_modes)
        ])
        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
        quats = [anno["quat"] for anno in annos]
        transes = [anno["trans"] for anno in annos]
        Rs = [quat2mat(quat) for quat in quats]
        # 0-based label
        cat_ids = [anno["category_id"] for anno in annos]
        K = d["cam"]
        kpts_2d = [
            misc.project_pts(kpt3d, K, R, t)
            for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)
        ]
        # # TODO: visualize pose and keypoints
        labels = [objs[cat_id] for cat_id in cat_ids]
        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
        img_vis = vis_image_mask_bbox_cv2(img,
                                          masks,
                                          bboxes=bboxes_xyxy,
                                          labels=labels)
        img_vis_kpts2d = img.copy()
        for anno_i in range(len(annos)):
            img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d,
                                                       kpts_2d[anno_i])
        grid_show(
            [
                img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]],
                img_vis_kpts2d[:, :, [2, 1, 0]], depth
            ],
            [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
            row=2,
            col=2,
        )
Пример #10
0
def evaluate_for_pix3d(
    predictions,
    dataset,
    metadata,
    filter_iou,
    mesh_models=None,
    iou_thresh=0.5,
    mask_thresh=0.5,
    device=None,
    vis_preds=False,
):
    from PIL import Image

    if device is None:
        device = torch.device("cpu")

    F1_TARGET = "[email protected]"

    # classes
    cat_ids = sorted(dataset.getCatIds())
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    # initialize tensors to record box & mask AP, number of gt positives
    box_apscores, box_aplabels = {}, {}
    mask_apscores, mask_aplabels = {}, {}
    mesh_apscores, mesh_aplabels = {}, {}
    npos = {}
    for cat_id in cat_ids:
        box_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        box_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mask_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mask_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mesh_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mesh_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        npos[cat_id] = 0.0
    box_covered = []
    mask_covered = []
    mesh_covered = []

    # number of gt positive instances per class
    for gt_ann in dataset.dataset["annotations"]:
        gt_label = gt_ann["category_id"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        image_file_name = dataset.loadImgs([gt_ann["image_id"]
                                            ])[0]["file_name"]
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue
        npos[gt_label] += 1.0

    for prediction in predictions:

        original_id = prediction["image_id"]
        image_width = dataset.loadImgs([original_id])[0]["width"]
        image_height = dataset.loadImgs([original_id])[0]["height"]
        image_size = [image_height, image_width]
        image_file_name = dataset.loadImgs([original_id])[0]["file_name"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue

        if "instances" not in prediction:
            continue

        num_img_preds = len(prediction["instances"])
        if num_img_preds == 0:
            continue

        # predictions
        scores = prediction["instances"].scores
        boxes = prediction["instances"].pred_boxes.to(device)
        labels = prediction["instances"].pred_classes
        masks_rles = prediction["instances"].pred_masks_rle
        if hasattr(prediction["instances"], "pred_meshes"):
            meshes = prediction["instances"].pred_meshes  # preditected meshes
            verts = [mesh[0] for mesh in meshes]
            faces = [mesh[1] for mesh in meshes]
            meshes = Meshes(verts=verts, faces=faces).to(device)
        else:
            meshes = ico_sphere(4, device)
            meshes = meshes.extend(num_img_preds).to(device)
        if hasattr(prediction["instances"], "pred_dz"):
            pred_dz = prediction["instances"].pred_dz
            heights = boxes.tensor[:, 3] - boxes.tensor[:, 1]
            # NOTE see appendix for derivation of pred dz
            pred_dz = pred_dz[:, 0] * heights.cpu()
        else:
            raise ValueError("Z range of box not predicted")
        assert prediction["instances"].image_size[0] == image_height
        assert prediction["instances"].image_size[1] == image_width

        # ground truth
        # anotations corresponding to original_id (aka coco image_id)
        gt_ann_ids = dataset.getAnnIds(imgIds=[original_id])
        assert len(
            gt_ann_ids) == 1  # note that pix3d has one annotation per image
        gt_anns = dataset.loadAnns(gt_ann_ids)[0]
        assert gt_anns["image_id"] == original_id

        # get original ground truth mask, box, label & mesh
        maskfile = os.path.join(metadata.image_root, gt_anns["segmentation"])
        with PathManager.open(maskfile, "rb") as f:
            gt_mask = torch.tensor(
                np.asarray(Image.open(f), dtype=np.float32) / 255.0)
        assert gt_mask.shape[0] == image_height and gt_mask.shape[
            1] == image_width

        gt_mask = (gt_mask > 0).to(dtype=torch.uint8)  # binarize mask
        gt_mask_rle = [
            mask_util.encode(np.array(gt_mask[:, :, None], order="F"))[0]
        ]
        gt_box = np.array(gt_anns["bbox"]).reshape(-1, 4)  # xywh from coco
        gt_box = BoxMode.convert(gt_box, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        gt_label = gt_anns["category_id"]
        faux_gt_targets = Boxes(
            torch.tensor(gt_box, dtype=torch.float32, device=device))

        # load gt mesh and extrinsics/intrinsics
        gt_R = torch.tensor(gt_anns["rot_mat"]).to(device)
        gt_t = torch.tensor(gt_anns["trans_mat"]).to(device)
        gt_K = torch.tensor(gt_anns["K"]).to(device)
        if mesh_models is not None:
            modeltype = gt_anns["model"]
            gt_verts, gt_faces = (
                mesh_models[modeltype][0].clone(),
                mesh_models[modeltype][1].clone(),
            )
            gt_verts = gt_verts.to(device)
            gt_faces = gt_faces.to(device)
        else:
            # load from disc
            raise NotImplementedError
        gt_verts = shape_utils.transform_verts(gt_verts, gt_R, gt_t)
        gt_zrange = torch.stack([gt_verts[:, 2].min(), gt_verts[:, 2].max()])
        gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces])

        # box iou
        boxiou = pairwise_iou(boxes, faux_gt_targets)

        # filter predictions with iou > filter_iou
        valid_pred_ids = boxiou > filter_iou

        # mask iou
        miou = mask_util.iou(masks_rles, gt_mask_rle, [0])

        # # gt zrange (zrange stores min_z and max_z)
        # # zranges = torch.stack([gt_zrange] * len(meshes), dim=0)

        # predicted zrange (= pred_dz)
        assert hasattr(prediction["instances"], "pred_dz")
        # It's impossible to predict the center location in Z (=tc)
        # from the image. See appendix for more.
        tc = (gt_zrange[1] + gt_zrange[0]) / 2.0
        # Given a center location (tc) and a focal_length,
        # pred_dz = pred_dz * box_h * tc / focal_length
        # See appendix for more.
        zranges = torch.stack(
            [
                torch.stack([
                    tc - tc * pred_dz[i] / 2.0 / gt_K[0],
                    tc + tc * pred_dz[i] / 2.0 / gt_K[0]
                ]) for i in range(len(meshes))
            ],
            dim=0,
        )

        gt_Ks = gt_K.view(1, 3).expand(len(meshes), 3)
        meshes = transform_meshes_to_camera_coord_system(
            meshes, boxes.tensor, zranges, gt_Ks, image_size)

        if vis_preds:
            vis_utils.visualize_predictions(
                original_id,
                image_file_name,
                scores,
                labels,
                boxes.tensor,
                masks_rles,
                meshes,
                metadata,
                "/tmp/output",
            )

        shape_metrics = compare_meshes(meshes, gt_mesh, reduce=False)

        # sort predictions in descending order
        scores_sorted, idx_sorted = torch.sort(scores, descending=True)

        for pred_id in range(num_img_preds):
            # remember we only evaluate the preds that have overlap more than
            # iou_filter with the ground truth prediction
            if valid_pred_ids[idx_sorted[pred_id], 0] == 0:
                continue
            # map to dataset category id
            pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]].item()]
            pred_miou = miou[idx_sorted[pred_id]].item()
            pred_biou = boxiou[idx_sorted[pred_id]].item()
            pred_score = scores[idx_sorted[pred_id]].view(1).to(device)
            # note that metrics returns f1 in % (=x100)
            pred_f1 = shape_metrics[F1_TARGET][
                idx_sorted[pred_id]].item() / 100.0

            # mask
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_miou > iou_thresh)
                    and (original_id not in mask_covered)):
                tpfp[0] = 1
                mask_covered.append(original_id)
            mask_apscores[pred_label].append(pred_score)
            mask_aplabels[pred_label].append(tpfp)

            # box
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_biou > iou_thresh)
                    and (original_id not in box_covered)):
                tpfp[0] = 1
                box_covered.append(original_id)
            box_apscores[pred_label].append(pred_score)
            box_aplabels[pred_label].append(tpfp)

            # mesh
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_f1 > iou_thresh)
                    and (original_id not in mesh_covered)):
                tpfp[0] = 1
                mesh_covered.append(original_id)
            mesh_apscores[pred_label].append(pred_score)
            mesh_aplabels[pred_label].append(tpfp)

    # check things for eval
    # assert npos.sum() == len(dataset.dataset["annotations"])
    # convert to tensors
    pix3d_metrics = {}
    boxap, maskap, meshap = 0.0, 0.0, 0.0
    valid = 0.0
    for cat_id in cat_ids:
        cat_name = dataset.loadCats([cat_id])[0]["name"]
        if npos[cat_id] == 0:
            continue
        valid += 1

        cat_box_ap = VOCap.compute_ap(torch.cat(box_apscores[cat_id]),
                                      torch.cat(box_aplabels[cat_id]),
                                      npos[cat_id])
        boxap += cat_box_ap
        pix3d_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap

        cat_mask_ap = VOCap.compute_ap(torch.cat(mask_apscores[cat_id]),
                                       torch.cat(mask_aplabels[cat_id]),
                                       npos[cat_id])
        maskap += cat_mask_ap
        pix3d_metrics["mask_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mask_ap

        cat_mesh_ap = VOCap.compute_ap(torch.cat(mesh_apscores[cat_id]),
                                       torch.cat(mesh_aplabels[cat_id]),
                                       npos[cat_id])
        meshap += cat_mesh_ap
        pix3d_metrics["mesh_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mesh_ap

    pix3d_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid
    pix3d_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid
    pix3d_metrics["mesh_ap@%.1f" % iou_thresh] = meshap / valid

    # print test ground truth
    vis_utils.print_instances_class_histogram(
        [npos[cat_id] for cat_id in cat_ids],  # number of instances
        [dataset.loadCats([cat_id])[0]["name"]
         for cat_id in cat_ids],  # class names
        pix3d_metrics,
    )

    return pix3d_metrics
Пример #11
0
def instances_to_coco_json(instances, img_id, input_format):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances)
    if num_instance == 0:
        return []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()

    has_exts = instances.has("ext_points")
    if has_exts:
        exts = instances.ext_points.tensor.numpy().tolist()

    has_mask = instances.has("pred_masks")
    has_poly = instances.has("pred_polys")
    if has_mask or has_poly:
        if has_poly:
            output_height = instances.image_size[0]
            output_width = instances.image_size[1]
            rles = get_polygon_rles(instances.pred_polys.flatten(),
                                    (output_height, output_width))
        else:
            if input_format == 'rle':  # input is directly in rle format from polygons
                rles = instances.pred_masks
            else:
                rles = [
                    mask_util.encode(
                        np.array(mask[:, :, None], order="F",
                                 dtype="uint8"))[0]
                    for mask in instances.pred_masks
                ]

        for rle in rles:
            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
            # json writer which always produces strings cannot serialize a bytestream
            # unless you decode it. Thankfully, utf-8 works out (which is also what
            # the pycocotools/_mask.pyx does).
            rle["counts"] = rle["counts"].decode("utf-8")

    has_keypoints = instances.has("pred_keypoints")
    if has_keypoints:
        keypoints = instances.pred_keypoints

    results = []
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
        }
        if has_mask or has_poly:
            result["segmentation"] = rles[k]
        if has_exts:
            result["extreme_points"] = exts[k]
        if has_keypoints:
            # In COCO annotations,
            # keypoints coordinates are pixel indices.
            # However our predictions are floating point coordinates.
            # Therefore we subtract 0.5 to be consistent with the annotation format.
            # This is the inverse of data loading logic in `datasets/coco.py`.
            keypoints[k][:, :2] -= 0.5
            result["keypoints"] = keypoints[k].flatten().tolist()
        results.append(result)
    return results
Пример #12
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes
    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"]
                for obj in annos]  # it may be bitmask instead of polygon
        visible_segm = [obj["visible_mask"] for obj in annos
                        ]  # it may be bitmask instead of polygon

        if mask_format == "polygon":
            masks = PolygonMasks(segm)
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                visible_masks = PolygonMasks(visible_segm)
        else:
            assert mask_format == "bitmask", mask_format

            if not isinstance(segm[0], list):
                masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in segm]))
                # visible_masks = visible_polygons
                # visible_masks = BitMasks(torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                masks = BitMasks.from_polygon_masks(segm, *image_size)
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                # print('masks:{}'.format(polygons))
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # print('visible_masks:{}'.format(visible_polygons))
                visible_masks = BitMasks.from_polygon_masks(
                    visible_segm, *image_size)

        target.gt_masks = masks
        target.gt_visible_masks = visible_masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Пример #13
0
def transform_instance_annotations(annotation,
                                   transforms,
                                   image_size,
                                   *,
                                   keypoint_hflip_indices=None):
    """
    Apply transforms to box, segmentation and keypoints annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"],
                           BoxMode.XYXY_ABS)
    # Note that bbox is 1d (per-instance bounding box)
    annotation["bbox"] = transforms.apply_box([bbox])[0]
    annotation["bbox_mode"] = BoxMode.XYXY_ABS

    if "segmentation" in annotation:
        if isinstance(annotation['segmentation'], dict):
            mask = mask_utils.decode(annotation['segmentation'])
            for transform in transforms.transforms:
                mask = transform.apply_image(mask)
            annotation['segmentation'] = mask
        else:
            # each instance contains 1 or more polygons
            polygons = [
                np.asarray(p).reshape(-1, 2)
                for p in annotation["segmentation"]
            ]
            annotation["segmentation"] = [
                p.reshape(-1) for p in transforms.apply_polygons(polygons)
            ]

    if "visible_mask" in annotation:
        if isinstance(annotation['visible_mask'], dict):
            mask = mask_utils.decode(annotation['visible_mask'])
            for transform in transforms.transforms:
                mask = transform.apply_image(mask)
            annotation['visible_mask'] = mask
        else:
            # each instance contains 1 or more polygons
            polygons = [
                np.asarray(p).reshape(-1, 2)
                for p in annotation["visible_mask"]
            ]
            annotation["visible_mask"] = [
                p.reshape(-1) for p in transforms.apply_polygons(polygons)
            ]

    if "keypoints" in annotation:
        keypoints = transform_keypoint_annotations(annotation["keypoints"],
                                                   transforms, image_size,
                                                   keypoint_hflip_indices)
        annotation["keypoints"] = keypoints

    return annotation
Пример #14
0
def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
    num_instances_without_valid_segmentation = 0
    num_instances_without_valid_bounding_box = 0
    dataset_dicts = []
    count_ignore_image_root_warning = 0
    for (img_dict, anno_dict_list) in zip(imgs, anns):
        record = {}
        # NOTE: besides using (relative path) in the "file_name" filed to represent
        # the image resource, "extended coco" also supports using uri which
        # represents an image using a single string, eg. "everstore_handle://xxx",
        if "://" not in img_dict["file_name"]:
            record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        else:
            if image_root is not None:
                count_ignore_image_root_warning += 1
                if count_ignore_image_root_warning == 1:
                    logger.warning(
                        (
                            "Found '://' in file_name: {}, ignore image_root: {}"
                            "(logged once per dataset)."
                        ).format(img_dict["file_name"], image_root)
                    )
            record["file_name"] = img_dict["file_name"]

        if "height" in img_dict or "width" in img_dict:
            record["height"] = img_dict["height"]
            record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]

        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same. This fails
            # only when the data parsing logic or the annotation file is buggy.
            assert anno["image_id"] == image_id
            assert anno.get("ignore", 0) == 0

            obj = {
                field: anno[field]
                # NOTE: maybe use MetadataCatalog for this
                for field in ["iscrowd", "bbox", "bbox_mode", "keypoints", "category_id", "extras"]
                if field in anno
            }

            bbox_object = obj.get("bbox", None)
            if bbox_object is not None and "bbox_mode" in obj:
                bbox_object = BoxMode.convert(bbox_object, obj["bbox_mode"], BoxMode.XYWH_ABS)
            if "width" in record and "height" in record and (not valid_bbox(bbox_object, record["width"], record["height"])):
                num_instances_without_valid_bounding_box += 1
                continue

            if obj.get("category_id", None) not in id_map:
                continue

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            if "bbox_mode" not in obj:
                if len(obj["bbox"]) == 5:
                    obj["bbox_mode"] = BoxMode.XYWHA_ABS
                else:
                    obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        if len(objs) == 0:
            continue
        if dataset_name is not None:
            record["dataset_name"] = dataset_name
        dataset_dicts.append(record)

    if count_ignore_image_root_warning > 0:
        logger.warning(
            "The 'ignore image_root: {}' warning occurred {} times".format(
                image_root, count_ignore_image_root_warning
            )
        )

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation
            )
        )

    if num_instances_without_valid_bounding_box > 0:
        logger.warning(
            "Filtered out {} instances without valid bounding boxes. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_bounding_box
            )
        )

    return dataset_dicts
Пример #15
0
    def draw_dataset_dict(self, dic, category=None):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
            category: the integer category for the desired annotation to display as a list or None if all of them

        Returns:
            output (VisImage): image object with visualizations.
        """
        # start additional code
        unfiltered_annos = dic.get("annotations", None)
        if category == None:
            annos = unfiltered_annos
        else:
            annos = []
            for annotations in unfiltered_annos:
                if annotations["category_id"] in category:
                    annos.append(annotations)
        # end additional code

        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [
                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
                for x in annos
            ]

            labels = [x["category_id"] for x in annos]
            colors = None
            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                    "thing_colors"):
                colors = [
                    self._jitter(
                        [x / 255 for x in self.metadata.thing_colors[c]])
                    for c in labels
                ]
            names = self.metadata.get("thing_classes", None)
            if names:
                labels = [names[i] for i in labels]
            labels = [
                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
                for i, a in zip(labels, annos)
            ]
            self.overlay_instances(labels=labels,
                                   boxes=boxes,
                                   masks=masks,
                                   keypoints=keypts,
                                   assigned_colors=colors,
                                   alpha=1.0)

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
                sem_seg = Image.open(f)
                sem_seg = np.asarray(sem_seg, dtype="uint8")
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
        return self.output
Пример #16
0
    def draw_dataset_dict(self, dic):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [
                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
                if len(x["bbox"]) == 4 else x["bbox"] for x in annos
            ]

            colors = None
            category_ids = [x["category_id"] for x in annos]
            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                    "thing_colors"):
                colors = [
                    self._jitter(
                        [x / 255 for x in self.metadata.thing_colors[c]])
                    for c in category_ids
                ]
            names = self.metadata.get("thing_classes", None)
            labels = _create_text_labels(
                category_ids,
                scores=None,
                class_names=[
                    "Hv", "Hp", "CLS", "BL", "PD", "PB", "CC", "LM", "D/P"
                ],
                is_crowd=[x.get("iscrowd", 0) for x in annos],
            )
            boxes = None
            alpha = 0
            self.overlay_instances(labels=labels,
                                   boxes=boxes,
                                   masks=masks,
                                   keypoints=keypts,
                                   assigned_colors=colors,
                                   alpha=alpha)

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
                sem_seg = Image.open(f)
                sem_seg = np.asarray(sem_seg, dtype="uint8")
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)

        pan_seg = dic.get("pan_seg", None)
        if pan_seg is None and "pan_seg_file_name" in dic:
            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
                pan_seg = Image.open(f)
                pan_seg = np.asarray(pan_seg)
                from panopticapi.utils import rgb2id

                pan_seg = rgb2id(pan_seg)
        if pan_seg is not None:
            segments_info = dic["segments_info"]
            pan_seg = torch.Tensor(pan_seg)
            self.draw_panoptic_seg(pan_seg,
                                   segments_info,
                                   area_threshold=0,
                                   alpha=0.5)
        return self.output
Пример #17
0
def instances_to_coco_json(instances, img_id):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances)
    if num_instance == 0:
        return []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()
    attr_classes = instances.attr_classes.tolist()
    attr_scores = instances.attr_scores.tolist()

    #print (len(scores), len(attr_scores), len(attr_classes))

    has_mask = instances.has("pred_masks")
    if has_mask:
        # use RLE to encode the masks, because they are too large and takes memory
        # since this evaluator stores outputs of the entire dataset
        rles = [
            mask_util.encode(
                np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_masks
        ]
        for rle in rles:
            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
            # json writer which always produces strings cannot serialize a bytestream
            # unless you decode it. Thankfully, utf-8 works out (which is also what
            # the pycocotools/_mask.pyx does).
            rle["counts"] = rle["counts"].decode("utf-8")

    has_keypoints = instances.has("pred_keypoints")
    if has_keypoints:
        keypoints = instances.pred_keypoints

    results = []
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
            "attributes": attr_classes[k],
            "attr_scores": attr_scores[k],
        }
        if has_mask:
            result["segmentation"] = rles[k]
        if has_keypoints:
            # In COCO annotations,
            # keypoints coordinates are pixel indices.
            # However our predictions are floating point coordinates.
            # Therefore we subtract 0.5 to be consistent with the annotation format.
            # This is the inverse of data loading logic in `datasets/coco.py`.
            keypoints[k][:, :2] -= 0.5
            result["keypoints"] = keypoints[k].flatten().tolist()
        results.append(result)
    return results
Пример #18
0
    def _original_call(self, dataset_dict):
        """
        Modified from detectron2's original __call__ in DatasetMapper
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        image = self._read_image(dataset_dict, format=self.img_format)
        if not self.backfill_size:
            utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        assert not self.load_proposals, "Not supported!"

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # Convert dataset_dict["annotations"] to dataset_dict["instances"]
            annotations = [
                obj for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]

            # Convert either rotated box or horizontal box to XYWHA_ABS format
            original_boxes = [
                BoxMode.convert(
                    box=obj["bbox"],
                    from_mode=obj["bbox_mode"],
                    to_mode=BoxMode.XYWHA_ABS,
                ) for obj in annotations
            ]

            transformed_boxes = transforms.apply_rotated_box(
                np.array(original_boxes, dtype=np.float64))

            instances = Instances(image_shape)
            instances.gt_classes = torch.tensor(
                [obj["category_id"] for obj in annotations], dtype=torch.int64)
            instances.gt_boxes = RotatedBoxes(transformed_boxes)
            instances.gt_boxes.clip(image_shape)

            dataset_dict["instances"] = instances[
                instances.gt_boxes.nonempty()]

        return dataset_dict
Пример #19
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.image_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        boxes = np.asarray([
            BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                            BoxMode.XYXY_ABS)
            for instance in dataset_dict["annotations"]
        ])
        aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                ) for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "train2017",
                    "thing_train2017").replace("image/train", "thing_train"))
            else:
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "coco", "lvis").replace("train2017", "thing_train"))
            # change extension to npz
            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
def convert_output_to_json(outputs, image_filename, metadata):
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    uid = common.createUUID('pred')

    boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = outputs['instances'].scores.tolist()
    category_id = outputs['instances'].pred_classes.tolist()

    classes = []
    for cat in category_id:
        cat_name = reverse_id_mapping[cat]
        classes.append(cat_name)

    num_instances = len(scores)

    print(outputs)

    if num_instances == 0:
        return []

    for k in range(num_instances):
        if k == 0:
            jsonres = {
                image_filename: {
                    "filename":
                    image_filename,
                    "size":
                    0,
                    "regions": [
                        {
                            "region_attributes": {
                                "label": classes[k],
                                "score": scores[k],
                            },
                            "shape_attributes": {
                                "name": "rect",
                                "y": boxes[k][0],
                                "x": boxes[k][1],
                                "height": boxes[k][2],
                                "width": boxes[k][3]
                            }
                        },
                    ],
                    "file_attributes": {
                        "width": 1920,
                        "height": 1280,
                        "uuid": uid
                    }
                }
            }
        else:
            jsonres[image_filename]["regions"].append({
                "region_attributes": {
                    "label": classes[k],
                    "score": scores[k],
                },
                "shape_attributes": {
                    "name": "rect",
                    "y": boxes[k][0],
                    "x": boxes[k][1],
                    "height": boxes[k][2],
                    "width": boxes[k][3]
                }
            })

    return jsonres
Пример #21
0
def convert_to_coco_dict(dataset_name):
    """
    Convert an instance detection/segmentation or keypoint detection dataset
    in detectron2's standard format into COCO json format.

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name (str):
            name of the source dataset
            Must be registered in DatastCatalog and in detectron2's standard format.
            Must have corresponding metadata "thing_classes"
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {
            v: k
            for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
        }
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[
            contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

    categories = [{
        "id": reverse_id_mapper(id),
        "name": name
    } for id, name in enumerate(metadata.thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict["annotations"]
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                if isinstance(segmentation, list):
                    polygons = PolygonMasks([segmentation])
                    area = polygons.area()[0].item()
                elif isinstance(segmentation, dict):  # RLE
                    area = mask_util.area(segmentation)
                else:
                    raise TypeError(
                        f"Unknown segmentation type {type(segmentation)}!")
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                                          BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = area
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
            coco_annotation["category_id"] = reverse_id_mapper(
                annotation["category_id"])

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                coco_annotation["segmentation"] = annotation["segmentation"]

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
Пример #22
0
    def lincomb_mask_loss(self, gt_classes, mask_coef, proto_mask, gt_instances, gt_matched_idxs):
        """
        Args:
            gt_classes: shapes are (N, R). See :meth:`Yolact.get_ground_truth`.
            mask_coef (list[Tensor]): lvl tensors, each has shape (N, Ax#masks, Hi, Wi).
                See :meth:`YolactHead.forward`.
            proto_mask (Tensor): shapes are (N, #masks, M, M).
            gt_instances (list[Instances]): a list of N `Instances`s.
            gt_matched_idxs (list[Tensor[int64]]): each element is a vector of length R, 
                where gt_matched_idxs[i] is a matched ground-truth index in [0, #objects)
        Return:
            loss_mask [dict]: mask loss scalar.
            maskiou_data (list[inputs, targets, classes]): the input of maskiou_net.
        """
        mask_size = proto_mask.size()[-2:]
        mask_area = mask_size[0] * mask_size[1]
        # shape: (N, M, M, #masks)
        proto_mask = proto_mask.permute(0, 2, 3, 1).contiguous()

        gt_masks = []
        gt_boxes = []
        gt_boxes_area = [] # for normalize weight
        gt_masks_area = [] # for discard_mask_area
        mask_weights = []
        with torch.no_grad():
            for i, instance_per_image in enumerate(gt_instances):
                gt_mask = instance_per_image.gt_masks.to(device=proto_mask.device).tensor
                gt_mask = gt_mask.permute(1,2,0).contiguous()
                gt_mask = F.interpolate(gt_mask, mask_size, mode="bilinear", align_corners=False)
                # gt_mask: shape (M, M, #objects)
                gt_mask = gt_mask.gt(0.5).float()
                gt_masks.append(gt_mask)
                gt_masks_area.append(gt_mask.sum(dim=(0, 1)))
                # mask weights
                gt_foreground_norm = gt_mask / (gt_mask.sum(dim=(0,1), keepdim=True) + 0.0001)
                gt_background_norm = (1-gt_mask) / ((1-gt_mask).sum(dim=(0,1), keepdim=True) + 0.0001)
                mask_weight = (gt_foreground_norm * self.mask_reweight + gt_background_norm) * mask_area
                mask_weights.append(mask_weight)
                # :class:`Boxes` shape (#objects, 4)
                # convert to relative coordinate to crop mask
                gt_box = BoxMode.convert(instance_per_image.gt_boxes, BoxMode.XYXY_ABS, BoxMode.XYXY_REL)
                gt_boxes.append(gt_box.tensor)
                # area(#objects)
                gt_boxes_area.append(gt_box.area())
      
        # convert to aligned with gt_classes
        mask_coef = [permute_to_N_HWA_K(x, self.num_masks) for x in mask_coef]
        # Tensor shape (N, R, #masks)
        mask_coef = cat(mask_coef, dim=1)

        mask_loss = 0
        maskiou_inputs = []
        maskiou_targets = []
        maskiou_classes = []
        # combine mask_coef and proto_mask to generate pred_mask of each image 
        # and calculate loss
        for i in range(len(gt_instances)):
            # gt_class
            gt_class = gt_classes[i]
            # -1: ignore, #num_classes: background
            foreground_idxs = (gt_class >= 0) & (gt_class != self.num_classes)
            pred_coef = mask_coef[i, foreground_idxs]
            # matrix multiply get shape (M, M, #pos)
            pred_mask = F.sigmoid(proto_mask[i] @ pred_coef.t())

            # matched ground truth objects' idx
            gt_matched_idx = gt_matched_idxs[i][foreground_idxs]
            # generate gt_masks
            gt_box = gt_boxes[i][gt_matched_idx]
            gt_mask = gt_masks[i][gt_matched_idx]
            # crop mask using gt_box
            pred_mask = crop(pred_mask, gt_box)
            
            pre_loss = F.binary_cross_entropy(
                torch.clamp(pred_mask, 0, 1), gt_mask, reduction='none')
            # mask_proto_reweight_mask_loss: foreground and background has different weights
            pre_loss = pre_loss * mask_weights[i][:, :, gt_matched_idx]
            # mask_proto_normalize_emulate_roi_pooling: 
            # Normalize the mask loss to emulate roi pooling's affect on loss.
            pre_loss = pre_loss.sum(dim=(0, 1)) * (mask_area / gt_boxes_area[i])

            mask_loss += pre_loss.sum()

            # cfg.use_maskiou
            select = gt_masks_area[i] > self.discard_mask_area
            if select.sum() > 0:
                pred_mask = pred_mask[:, :, select]
                gt_mask = gt_mask[:, :, select]
                gt_class = gt_class[select]
                # maskiou net input: (N, 1, H, W)
                maskiou_input = pred_mask.permute(2, 0, 1).contiguous().unsqueeze(1)
                pred_mask = pred_mask.gt(0.5).float()
                # maskiou net target: (N)             
                maskiou_target = mask_iou(pred_mask, gt_mask)
                maskiou_inputs.append(maskiou_input)
                maskiou_targets.append(maskiou_target)
                maskiou_classes.append(gt_class)

        losses = {"loss_mask": mask_loss / mask_area * self.mask_alpha}

        if len(maskiou_targets) == 0:
            return losses, None
        else:
            # all images have same size masks
            # so the tensor are shape (N*I, 1, H, W)
            maskiou_targets = torch.cat(maskiou_targets)
            maskiou_classes = torch.cat(maskiou_classes)
            maskiou_inputs = torch.cat(maskiou_inputs)
            return losses, (maskiou_inputs, maskiou_targets, maskiou_classes)
Пример #23
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    target.gt_boxes = Boxes(boxes)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            # TODO check type and provide better error
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
            )
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Пример #24
0
def annotations_to_instances_with_attributes(annos,
                                             image_size,
                                             mask_format="polygon",
                                             load_attributes=False,
                                             max_attr_per_ins=16):
    """
    Extend the function annotations_to_instances() to support attributes
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    if len(annos) and load_attributes:
        attributes = -torch.ones(
            (len(annos), max_attr_per_ins), dtype=torch.int64)
        for idx, anno in enumerate(annos):
            if "attribute_ids" in anno:
                for jdx, attr_id in enumerate(anno["attribute_ids"]):
                    attributes[idx, jdx] = attr_id
        target.gt_attributes = attributes

    return target
Пример #25
0
 def _convert_xywha_to_xyxy(self, x):
     return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
Пример #26
0
def evaluate_box_proposal(predictions,
                          coco_api,
                          thresholds=None,
                          aspect_ratio_range=None,
                          limit=None,
                          oriented=False):
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in tqdm.tqdm(predictions):
        image_id = prediction_dict["image_id"]
        predictions = prediction_dict["instances"]
        predict_boxes = [
            BoxMode.convert(prediction['bbox'], BoxMode.XYWH_ABS,
                            BoxMode.XYXY_ABS) for prediction in predictions
        ]
        predict_boxes = torch.as_tensor(predict_boxes).reshape(-1, 4)
        predict_boxes = Boxes(predict_boxes)

        ann_ids = coco_api.getAnnIds(imgIds=image_id)
        anno = coco_api.loadAnns(ann_ids)
        anno = [obj for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in anno
        ]
        if oriented:
            gt_aspect_ratios = []
            for obj in anno:
                if obj["iscrowd"]:
                    gt_aspect_ratios.append(obj["bbox"][2] / obj["bbox"][3])
                else:
                    segmentations = PolygonMasks([obj["segmentation"]])
                    ratios = segmentations.get_ratios(oriented=True)
                    gt_aspect_ratios += ratios
        else:
            gt_aspect_ratios = [
                obj["bbox"][2] / obj["bbox"][3]  # w / h ==> aspect ratio
                for obj in anno
            ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_aspect_ratios = torch.as_tensor(gt_aspect_ratios)

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_aspect_ratios >= aspect_ratio_range[0]) & \
                        (gt_aspect_ratios <= aspect_ratio_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        if len(gt_boxes) == 0:
            continue

        num_pos += len(gt_boxes)
        if limit is not None and len(predictions) > limit:
            predict_boxes = predict_boxes[:limit]

        overlaps = pairwise_iou(predict_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)

    gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else
                   torch.zeros(0, dtype=torch.float32))
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Пример #27
0
    def draw_dataset_dict(self, dic, assigned_colors=None):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos]

            labels = [x["category_id"] for x in annos]
            colors = assigned_colors
            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
                colors = [
                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
                ]
            names = self.metadata.get("thing_classes", None)
            if names:
                labels = [names[i] for i in labels]
            labels = [
                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
                for i, a in zip(labels, annos)
            ]
            self.overlay_instances(
                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
            )

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
                sem_seg = Image.open(f)
                sem_seg = np.asarray(sem_seg, dtype="uint8")
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)

        pan_seg = dic.get("pan_seg", None)
        if pan_seg is None and "pan_seg_file_name" in dic:
            assert "segments_info" in dic
            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
                pan_seg = Image.open(f)
                pan_seg = np.asarray(pan_seg)
                from panopticapi.utils import rgb2id

                pan_seg = rgb2id(pan_seg)
            segments_info = dic["segments_info"]
        if pan_seg is not None:
            pan_seg = torch.Tensor(pan_seg)
            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
        return self.output
Пример #28
0
def _evaluate_predictions_ar(
        predictions,
        coco_api,
        metadata,
        thresholds=None,
        aspect_ratios={},
        areas={},
        limit=None):
    cats = coco_api.cats.values()
    ratios = list(aspect_ratios.values())
    areas = list(areas.values())
    K = len(cats) + 1  # -1 for all classes
    R = len(ratios)
    A = len(areas)  # Area ranges

    counts_matrixes = []
    overlap_matrixes = []

    gt_overlaps = []

    for prediction_dict in predictions:
        count_matrix = torch.zeros((K, R, A), dtype=torch.int32)

        image_id = prediction_dict["image_id"]
        predictions = prediction_dict["instances"]
        predict_boxes = [
            BoxMode.convert(prediction['bbox'], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for prediction in predictions
        ]
        predict_classes = torch.tensor([
            prediction["category_id"] for prediction in predictions
        ])
        predict_boxes = torch.as_tensor(predict_boxes).reshape(-1, 4)
        predict_boxes = Boxes(predict_boxes)

        ann_ids = coco_api.getAnnIds(imgIds=image_id)
        anno = coco_api.loadAnns(ann_ids)
        anno = [obj for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in anno
        ]
        gt_classes = torch.tensor([
            metadata.thing_dataset_id_to_contiguous_id[obj["category_id"]]
            for obj in anno])
        gt_aspect_ratios = [
            obj["ratio"] for obj in anno
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
        gt_boxes = Boxes(gt_boxes)
        gt_aspect_ratios = torch.as_tensor(gt_aspect_ratios)
        gt_areas = torch.as_tensor(
            [(box[2] - box[0]) * (box[3] - box[1]) for box in gt_boxes])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        if len(gt_boxes) == 0:
            continue

        N = len(gt_boxes)
        overlap_matrix = torch.zeros((K, R, A, N), dtype=torch.float32)
        for i in range(len(gt_boxes)):
            k = gt_classes[i]
            r = between_ranges(gt_aspect_ratios[i], ratios)
            a = torch.tensor(between_ranges(gt_areas[i], areas)).nonzero()
            count_matrix[k, r, a] += 1
            count_matrix[-1, r, a] += 1

        if limit is not None and len(predictions) > limit:
            predict_boxes = predict_boxes[:limit]

        overlaps = pairwise_iou(predict_boxes, gt_boxes)
        class_matched = predict_classes[:, None] == gt_classes[None]
        overlaps_when_matched = overlaps * class_matched

        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
            max_overlaps_m, argmax_overlaps_m = overlaps_when_matched.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            gt_ovr_m, gt_ind_m = max_overlaps_m.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            box_ind_m = argmax_overlaps_m[gt_ind_m]
            # record the iou coverage of this gt box
            k = gt_classes[gt_ind_m]
            r = between_ranges(gt_aspect_ratios[gt_ind_m], ratios)
            a = torch.tensor(between_ranges(gt_areas[gt_ind_m], areas)).nonzero()
            n = (torch.arange(N) == j).nonzero()
            overlap_matrix[k, r, a, n] = overlaps_when_matched[box_ind_m, gt_ind_m]
            overlap_matrix[-1, r, a, n] = overlaps[box_ind, gt_ind]
            assert torch.all(overlap_matrix[k, r, a, n] == gt_ovr_m)
            assert torch.all(overlap_matrix[-1, r, a, n] == gt_ovr)
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
            overlaps_when_matched[box_ind_m, :] = -1
            overlaps_when_matched[:, gt_ind_m] = -1

        # append recorded iou coverage level
        overlap_matrixes.append(overlap_matrix)
        counts_matrixes.append(count_matrix)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    T = len(thresholds)
    recalls = torch.zeros((T, K, R, A))

    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        count = torch.zeros((K, R, A))
        hit = torch.zeros((K, R, A))
        for count_matrix, overlap_matrix in zip(counts_matrixes, overlap_matrixes):
            hit_matrix = (overlap_matrix >= t).float().sum(-1)
            count += count_matrix
            hit += hit_matrix
        recalls[i] = hit / torch.max(
            count.float(), torch.tensor(1).float())
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls[:, -1, 0, 0].mean()
    mar = recalls[:, :-1, 0, 0].mean()
    return {
        "ar": ar,
        "mar": mar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": torch.stack(counts_matrixes).sum(0),
    }
Пример #29
0
    def process(self, inputs, outputs):
        """
        Args:
            inputs: the inputs to a model (e.g., GeneralizedRCNN).
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id".
            outputs: the outputs of a model. It is a list of dicts with key
                "instances" that contains :class:`Instances`.
        """
        for input, output in zip(inputs, outputs):
            prediction = {"0": {}, "1": {}}
            tmp_instances = {"0": {}, "1": {}}
            for i in range(2):
                # TODO this is ugly
                prediction[str(i)]["image_id"] = input[str(i)]["image_id"]
                prediction[str(i)]["file_name"] = input[str(i)]["file_name"]
                if "instances" in output[str(i)]:
                    instances = output[str(i)]["instances"].to(
                        self._cpu_device)
                    prediction[str(i)]["instances"] = instances_to_coco_json(
                        instances, input[str(i)]["image_id"])
                    tmp_instances[str(i)]["embeddingbox"] = {
                        "pred_boxes": instances.pred_boxes,
                        "scores": instances.scores,
                    }
                if "proposals" in output[str(i)]:
                    prediction[str(i)]["proposals"] = output[str(
                        i)]["proposals"].to(self._cpu_device)
                if "annotations" in input[str(i)]:
                    tmp_instances[str(i)]["gt_bbox"] = [
                        ann["bbox"] for ann in input[str(i)]["annotations"]
                    ]
                    if len(input[str(i)]["annotations"]) > 0:
                        tmp_instances[str(i)]["gt_bbox"] = np.array(
                            tmp_instances[str(i)]["gt_bbox"]).reshape(
                                -1, 4)  # xywh from coco
                        original_mode = input[str(
                            i)]["annotations"][0]["bbox_mode"]
                        tmp_instances[str(i)]["gt_bbox"] = BoxMode.convert(
                            tmp_instances[str(i)]["gt_bbox"],
                            BoxMode(original_mode),
                            BoxMode.XYXY_ABS,
                        )
                        if hasattr(output[str(i)]["instances"], "pred_plane"):
                            prediction[str(i)]["pred_plane"] = output[str(
                                i)]["instances"].pred_plane.to(
                                    self._cpu_device)
                if output["depth"][str(i)] is not None:
                    prediction[str(i)]["pred_depth"] = output["depth"][str(
                        i)].to(self._cpu_device)
                    xyz = self.depth2XYZ(output["depth"][str(i)])
                    prediction[str(i)] = self.override_offset(
                        xyz, prediction[str(i)], output[str(i)])
                    depth_rst = get_depth_err(
                        output["depth"][str(i)],
                        input[str(i)]["depth"].to(self._device))
                    prediction[str(i)]["depth_l1_dist"] = depth_rst.to(
                        self._cpu_device)

            if "pred_aff" in output:
                tmp_instances["pred_aff"] = output["pred_aff"].to(
                    self._cpu_device)
            if "geo_aff" in output:
                tmp_instances["geo_aff"] = output["geo_aff"].to(
                    self._cpu_device)
            if "emb_aff" in output:
                tmp_instances["emb_aff"] = output["emb_aff"].to(
                    self._cpu_device)
            if "gt_corrs" in input:
                tmp_instances["gt_corrs"] = input["gt_corrs"]
            prediction["corrs"] = tmp_instances
            if "embedding" in self._plane_tasks:
                if self._eval_gt_box:
                    aff_rst = get_affinity_label_score(
                        tmp_instances,
                        filter_iou=self._filter_iou,
                        filter_score=self._filter_score,
                        device=self._device,
                    )
                else:
                    aff_rst = get_affinity_label_score(
                        tmp_instances,
                        hungarian_threshold=[],
                        filter_iou=self._filter_iou,
                        filter_score=self._filter_score,
                        device=self._device,
                    )
                prediction.update(aff_rst)
            if "camera" in self._plane_tasks:
                camera_dict = {
                    "logits": {
                        "tran": output["camera"]["tran"].to(self._cpu_device),
                        "rot": output["camera"]["rot"].to(self._cpu_device),
                    },
                    "gts": {
                        "tran": input["rel_pose"]["position"],
                        "rot": input["rel_pose"]["rotation"],
                        "tran_cls": input["rel_pose"]["tran_cls"],
                        "rot_cls": input["rel_pose"]["rot_cls"],
                    },
                }
                prediction["camera"] = camera_dict
            self._predictions.append(prediction)
Пример #30
0
def amodal_instances_to_coco_json(instances, img_id, type="amodal"):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances)
    if num_instance == 0:
        return [], []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()

        # use RLE to encode the masks, because they are too large and takes memory
        # since this evaluator stores outputs of the entire dataset
    if type == "amodal":
        amodal_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_amodal_masks
        ]


        visible_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_visible_masks
        ]

        area = [
            (torch.sum(amodal_mask * visible_mask).float() / torch.sum(amodal_mask).float()).item()
            for amodal_mask, visible_mask in zip(instances.pred_amodal_masks, instances.pred_visible_masks)
        ]
    elif type == "amodal2":
        amodal_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_amodal2_masks
        ]

        visible_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_visible2_masks
        ]

        area = [
            (torch.sum(amodal_mask * visible_mask).float() / torch.sum(amodal_mask).float()).item()
            for amodal_mask, visible_mask in zip(instances.pred_amodal2_masks, instances.pred_visible2_masks)]
    elif type == "amodal_ensemble":
        amodal_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_amodal_ensemble_masks
        ]

        visible_rles = [
            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_visible_ensemble_masks
        ]

        area = [
            (torch.sum(amodal_mask * visible_mask).float() / torch.sum(amodal_mask).float()).item()
            for amodal_mask, visible_mask in zip(instances.pred_visible_ensemble_masks, instances.pred_visible_ensemble_masks)
        ]
    # if type == "amodal":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_amodal_masks
    #     ]
    # elif type == "visible":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_visible_masks
    #     ]
    # elif type == "amodal2":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_amodal2_masks
    #     ]
    # elif type == "visible2":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_visible2_masks
    #     ]
    # elif type == "amodal_ensemble":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_amodal_ensemble_masks
    #     ]
    # elif type == "visible_ensemble":
    #     rles = [
    #         mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
    #         for mask in instances.pred_visible_ensemble_masks
    #     ]
    else:
        raise ValueError("type == {} is not available")

    for amodal_rle, visible_rle in zip(amodal_rles, visible_rles):
        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
        # json writer which always produces strings cannot serialize a bytestream
        # unless you decode it. Thankfully, utf-8 works out (which is also what
        # the pycocotools/_mask.pyx does).
        amodal_rle["counts"] = amodal_rle["counts"].decode("utf-8")
        visible_rle["counts"] = visible_rle["counts"].decode("utf-8")


    amodal_results = []
    visible_results = []
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
            "segmentation": amodal_rles[k],
            "area": area[k]
        }
        amodal_results.append(result)
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
            "segmentation": visible_rles[k],
            "area": area[k]
        }
        visible_results.append(result)
    return amodal_results, visible_results