Пример #1
0
    def test_fast_rcnn_rotated(self):
        torch.manual_seed(132)
        box_head_output_size = 8

        box_predictor = RotatedFastRCNNOutputLayers(
            ShapeSpec(channels=box_head_output_size),
            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5,
                                                               1)),
            num_classes=5,
        )
        feature_pooled = torch.rand(2, box_head_output_size)
        predictions = box_predictor(feature_pooled)
        proposal_boxes = torch.tensor(
            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]],
            dtype=torch.float32)
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]],
                                dtype=torch.float32)
        proposal = Instances((10, 10))
        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
        proposal.gt_boxes = RotatedBoxes(gt_boxes)
        proposal.gt_classes = torch.tensor([1, 2])

        with EventStorage():  # capture events in a new storage to discard them
            losses = box_predictor.losses(predictions, [proposal])

        # Note: the expected losses are slightly different even if
        # the boxes are essentially the same as in the FastRCNNOutput test, because
        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
        # between the two cases.
        expected_losses = {
            "loss_cls": torch.tensor(1.7920907736),
            "loss_box_reg": torch.tensor(4.0410838127),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])
Пример #2
0
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg, feature_shape)
        roi_heads = build_roi_heads(cfg, feature_shape)

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        detector_losses.update(proposal_losses)
        expected_losses = {
            "loss_cls": 4.365657806396484,
            "loss_box_reg": 0.0015851043863222003,
            "loss_rpn_cls": 0.2427729219198227,
            "loss_rpn_loc": 0.3646621108055115,
        }
        succ = all(
            torch.allclose(detector_losses[name],
                           torch.tensor(expected_losses.get(name, 0.0)))
            for name in detector_losses.keys())
        self.assertTrue(
            succ,
            "Losses has changed! New losses: {}".format(
                {k: v.item()
                 for k, v in detector_losses.items()}),
        )
 def compute_iou_dt_gt(self, dt, gt, is_crowd):
     if self.is_rotated(dt) or self.is_rotated(gt):
         # TODO: take is_crowd into consideration
         assert all(c == 0 for c in is_crowd)
         dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
         gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
         return pairwise_iou_rotated(dt, gt)
     else:
         # This is the same as the classical COCO evaluation
         return maskUtils.iou(dt, gt, is_crowd)
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]],
                                 dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]],
                                 dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = build_proposal_generator(cfg,
                                                      backbone.output_shape())
        roi_heads = build_roi_heads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals,
                                           gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.381618499755859),
            "loss_box_reg": torch.tensor(0.0011829272843897343),
        }
        for name in expected_losses.keys():
            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
                name, detector_losses[name], expected_losses[name])
            self.assertTrue(
                torch.allclose(detector_losses[name], expected_losses[name]),
                err_msg)
Пример #5
0
def cgrcnn_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    depth = np.load(dataset_dict["file_name"]).astype(np.float32)
    inst = Instances(depth.shape)
    depth = torch.from_numpy(np.tile(depth, (3, 1, 1)))

    grasps = dataset_dict["annotations"]
    gt_boxes, gt_tilts, gt_z, gt_metric = None, None, None, None
    for grasp in grasps:
        box, z, tilt, metric = np.array(grasp["bbox"]), np.array(
            grasp["z"]), np.array(grasp["tilt"]), np.array(grasp["metric"])
        if gt_boxes is None:
            gt_boxes, gt_tilts, gt_z, gt_metric = box, tilt, z, metric
        else:
            gt_boxes = np.vstack((gt_boxes, box))
            gt_tilts = np.hstack((gt_tilts, tilt))
            gt_z = np.hstack((gt_z, z))
            gt_metric = np.hstack((gt_metric, metric))

    inst.gt_boxes = RotatedBoxes(
        torch.from_numpy(gt_boxes.astype(np.float32).reshape(-1, 5)))
    # inst.gt_tilts = torch.from_numpy(gt_tilts.astype(np.float32))
    # inst.gt_z = torch.from_numpy(gt_z.astype(np.float32))
    # inst.gt_metric = torch.from_numpy(gt_metric.astype(np.float32))
    inst.gt_classes = torch.ones(gt_boxes.shape[0], dtype=torch.int64)

    return {"image": depth, "instances": inst}
Пример #6
0
    def label_and_sample_proposals(self, proposals, targets):
        """
        Prepare some proposals to be used to train the RROI heads.
        It performs box matching between `proposals` and `targets`, and assigns
        training labels to the proposals.
        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
        with a fraction of positives that is no larger than `self.positive_sample_fraction.

        Args:
            See :meth:`StandardROIHeads.forward`

        Returns:
            list[Instances]: length `N` list of `Instances`s containing the proposals
                sampled for training. Each `Instances` has the following fields:
                - proposal_boxes: the rotated proposal boxes
                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
                  (this is only meaningful if the proposal has a label > 0; if label = 0
                   then the ground-truth box is random)
                - gt_classes: the ground-truth classification lable for each proposal
        """
        gt_boxes = [x.gt_boxes for x in targets]
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
            )
            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
            sampled_idxs, gt_classes = self._sample_proposals(
                matched_idxs, matched_labels, targets_per_image.gt_classes
            )

            proposals_per_image = proposals_per_image[sampled_idxs]
            proposals_per_image.gt_classes = gt_classes

            if has_gt:
                sampled_targets = matched_idxs[sampled_idxs]
                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
            else:
                gt_boxes = RotatedBoxes(
                    targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5))
                )
                proposals_per_image.gt_boxes = gt_boxes

            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
            proposals_with_gt.append(proposals_per_image)

        # Log the number of fg/bg samples that are selected for training ROI heads
        storage = get_event_storage()
        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))

        return proposals_with_gt
Пример #7
0
    def convert_outputs(self, batched_inputs, inputs, results):
        image_sizes = inputs["image_sizes"]
        m_results = [Instances(image_size) for image_size in image_sizes]

        pred_boxes = results["pred_boxes"]
        scores = results["scores"]
        pred_classes = results["pred_classes"].to(torch.int64)
        batch_splits = results["batch_splits"].to(torch.int64).cpu()
        pred_masks = results.get("pred_masks", None)
        if pred_boxes.shape[1] == 5:
            pred_boxes = RotatedBoxes(pred_boxes)
        else:
            pred_boxes = Boxes(pred_boxes)

        offset = 0
        for i in range(len(batched_inputs)):
            next_offset = offset + batch_splits[i]
            m_results[i].pred_boxes = pred_boxes[offset:next_offset]
            m_results[i].scores = scores[offset:next_offset]
            m_results[i].pred_classes = pred_classes[offset:next_offset]
            if "pred_masks" in results:
                num_masks = batch_splits[i]
                indices = torch.arange(num_masks, device=pred_classes.device)
                m_results[i].pred_masks = \
                    pred_masks[offset:next_offset][indices, m_results[i].pred_classes][:, None]
            offset = next_offset

        return meta_arch.GeneralizedRCNN._postprocess(m_results,
                                                      batched_inputs,
                                                      image_sizes)
Пример #8
0
def dota_annotations_to_instances(annos, image_size):

    target = Instances(image_size)

    obb_boxes = [obj["boxes"] for obj in annos]
    obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes)
    obb_boxes.clip(image_size)

    pt_hbb, pt_inbox, polygons = [], [], []

    rotate_boxes = obb_boxes.tensor.numpy()
    data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes]
    for d in data:
        pt_hbb.append(d[0])
        pt_inbox.append(d[1])
        polygons.append(d[2])

    target.gt_pt_inbox_boxes = Boxes(pt_inbox)

    target.gt_pt_hbb_boxes = Boxes(pt_hbb)

    classes = [obj["category_id"] + 1 for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    masks = PolygonMasks(polygons)
    target.gt_masks = masks

    if len(target) > 2000:
        mask = random.sample(list(range(0, len(target))), 2000)
        target = target[mask]

    return target
Пример #9
0
    def _create_proposals_from_boxes(self, boxes, image_sizes):
        """
        Args:
            boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
            image_sizes (list[tuple]): list of image shapes in (h, w)

        Returns:
            list[Instances]: per-image proposals with the given boxes.
        """
        # Just like RPN, the proposals should not have gradients
        boxes = [RotatedBoxes(b.detach()) for b in boxes]
        proposals = []
        for boxes_per_image, image_size in zip(boxes, image_sizes):
            boxes_per_image.clip(image_size)
            if self.training:
                # do not filter empty boxes at inference time,
                # because the scores from each stage need to be aligned and added later
                boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
                if (not boxes_per_image.nonempty().all()):
                    print("create_proposals")
                    print(boxes_per_image)
            prop = Instances(image_size)
            prop.proposal_boxes = boxes_per_image
            proposals.append(prop)
        return proposals
Пример #10
0
    def select_over_all_levels(self, bboxlist, scorelist, cls_list,
                               image_sizes):
        # num_images = len(image_sizes)
        results = []
        for i, (boxes, labels,
                scores) in enumerate(zip(bboxlist, cls_list, scorelist)):
            # skip the background

            keep = batched_nms_rotated(boxes, scores, labels, self.nms_thresh)

            boxes = boxes[keep]
            scores = scores[keep]
            labels = labels[keep]

            number_of_detections = boxes.size(0)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = scores.clone()
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                boxes = boxes[keep]
                scores = scores[keep]
                labels = labels[keep]

            result = Instances(image_sizes[i])
            result.pred_boxes = RotatedBoxes(boxes)
            result.scores = scores
            result.pred_classes = labels
            results.append(result)
        return results
Пример #11
0
def annotations_to_instances_rotated(annos, image_size):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.
    Compared to `annotations_to_instances`, this function is for rotated boxes only

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            Containing fields "gt_boxes", "gt_classes",
            if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [obj["bbox"] for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = RotatedBoxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    return target
    def test_fast_rcnn_rotated(self):
        torch.manual_seed(132)
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        box2box_transform = Box2BoxTransformRotated(
            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)

        box_head_output_size = 8
        num_classes = 5
        cls_agnostic_bbox_reg = False

        box_predictor = FastRCNNOutputLayers(box_head_output_size,
                                             num_classes,
                                             cls_agnostic_bbox_reg,
                                             box_dim=5)
        feature_pooled = torch.rand(2, box_head_output_size)
        pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled)
        image_shape = (10, 10)
        proposal_boxes = torch.tensor(
            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]],
            dtype=torch.float32)
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]],
                                dtype=torch.float32)
        result = Instances(image_shape)
        result.proposal_boxes = RotatedBoxes(proposal_boxes)
        result.gt_boxes = RotatedBoxes(gt_boxes)
        result.gt_classes = torch.tensor([1, 2])
        proposals = []
        proposals.append(result)
        smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA

        outputs = FastRCNNOutputs(box2box_transform, pred_class_logits,
                                  pred_proposal_deltas, proposals,
                                  smooth_l1_beta)
        with EventStorage():  # capture events in a new storage to discard them
            losses = outputs.losses()

        # Note: the expected losses are slightly different even if
        # the boxes are essentially the same as in the FastRCNNOutput test, because
        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
        # between the two cases.
        expected_losses = {
            "loss_cls": torch.tensor(1.7920907736),
            "loss_box_reg": torch.tensor(4.0410838127),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])
Пример #13
0
    def __init__(
        self,
        box2box_transform,
        pred_class_logits,
        pred_proposal_deltas,
        proposals,
        smooth_l1_beta=0,
    ):
        """
        Args:
            box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
                box2box transform instance for proposal-to-detection transformations.
            pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
                logits for all R predicted object instances.
                Each row corresponds to a predicted object instance.
            pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
                class-specific or class-agnostic regression. It stores the predicted deltas that
                transform proposals into final box detections.
                B is the box dimension (4 or 5).
                When B is 4, each row is [dx, dy, dw, dh (, ....)].
                When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
            proposals (list[Instances]): A list of N Instances, where Instances i stores the
                proposals for image i, in the field "proposal_boxes".
                When training, each Instances must have ground-truth labels
                stored in the field "gt_classes" and "gt_boxes".
                The total number of all instances must be equal to R.
            smooth_l1_beta (float): The transition point between L1 and L2 loss in
                the smooth L1 loss function. When set to 0, the loss becomes L1. When
                set to +inf, the loss becomes constant 0.
        """
        self.box2box_transform = box2box_transform
        self.num_preds_per_image = [len(p) for p in proposals]
        self.pred_class_logits = pred_class_logits
        self.pred_proposal_deltas = pred_proposal_deltas
        self.smooth_l1_beta = smooth_l1_beta
        self.image_shapes = [x.image_size for x in proposals]

        if len(proposals):
            box_type = type(proposals[0].proposal_boxes)
            # cat(..., dim=0) concatenates over all images in the batch
            self.proposals = box_type.cat(
                [p.proposal_boxes for p in proposals])
            assert (not self.proposals.tensor.requires_grad
                    ), "Proposals should not require gradients!"

            # The following fields should exist only when training.
            if proposals[0].has("gt_boxes"):
                self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
                assert proposals[0].has("gt_classes")
                self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
        else:
            if self.pred_proposal_deltas.shape[1] == 4:
                self.proposals = Boxes(
                    torch.zeros(0, 4, device=self.pred_proposal_deltas.device))
            else:
                self.proposals = RotatedBoxes(
                    torch.zeros(0, 5, device=self.pred_proposal_deltas.device))
        self._no_instances = self.pred_proposal_deltas.size(
            0) == 0  # no instances found
Пример #14
0
def dota_annotations_to_instances(annos, image_size):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """

    target = Instances(image_size)

    obb_boxes = [obj["boxes"] for obj in annos]
    obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes)
    # obb_boxes.clip(image_size)

    pt_hbb, pt_inbox, polygons = [], [], []

    rotate_boxes = obb_boxes.tensor.numpy()
    data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes]
    for d in data:
        pt_hbb.append(d[0])
        pt_inbox.append(d[1])
        polygons.append(d[2])

    pt_inbox = torch.as_tensor(pt_inbox).to(dtype=torch.float)
    target.gt_pt_inbox_boxes = Boxes(pt_inbox)

    pt_hbb = torch.as_tensor(pt_hbb).to(dtype=torch.float)
    target.gt_pt_hbb_boxes = Boxes(pt_hbb)

    # for sigmoid_focal_loss_jit the category id should start with 0
    # for SigmoidFocalLoss in layers the category id should start with 1
    classes = [obj["category_id"] + 1 for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    # masks = PolygonMasks(polygons)
    masks_areas = target.gt_pt_hbb_boxes.area()
    # masks = torch.as_tensor(masks.polygons).to(dtype=torch.float)
    # target.gt_poly = masks.view(-1, 8)
    target.gt_areas = masks_areas.to(dtype=torch.float)

    if len(target) > 1000:
        mask = random.sample(list(range(0, len(target))), 1000)
        target = target[mask]

    return target
Пример #15
0
 def computeIoU(self, imgId, catId):
     p = self.params
     if p.useCats:
         gt = self._gts[imgId, catId]
         dt = self._dts[imgId, catId]
     else:
         gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
         dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
     if len(gt) == 0 and len(dt) == 0:
         return []
     inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
     dt = [dt[i] for i in inds]
     if len(dt) > p.maxDets[-1]:
         dt = dt[0:p.maxDets[-1]]
     ious = np.zeros((len(dt), len(gt)))
     for j, g in enumerate(gt):
         for i, d in enumerate(dt):
             # create bounds for ignore regions(double the gt bbox)
             gt_rotated_box = RotatedBoxes(
                 torch.tensor(g['bbox'], dtype=torch.float).view(-1, 5))
             dt_rotated_box = RotatedBoxes(
                 torch.tensor(d['bbox'], dtype=torch.float).view(-1, 5))
             ious[i, j] = pairwise_iou_rotated(gt_rotated_box,
                                               dt_rotated_box)
             del gt_rotated_box, dt_rotated_box
     # if p.iouType == 'segm':
     #     g = [g['segmentation'] for g in gt]
     #     d = [d['segmentation'] for d in dt]
     # elif p.iouType == 'bbox':
     #     g = [g['bbox'] for g in gt]
     #     d = [d['bbox'] for d in dt]
     # else:
     #     raise Exception('unknown iouType for iou computation')
     #
     # # compute iou between each dt and gt region
     # iscrowd = [int(o['iscrowd']) for o in gt]
     # ious = maskUtils.iou(d,g,iscrowd)
     del gt, dt
     return ious
Пример #16
0
    def draw_dataset_dict(self, dic):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            if annos[0]["bbox_mode"] == BoxMode.XYWHA_ABS:
                boxes = RotatedBoxes(
                    torch.stack([torch.as_tensor(x["bbox"]) for x in annos]))
            else:
                boxes = [
                    BoxMode.convert(x["bbox"], x["bbox_mode"],
                                    BoxMode.XYXY_ABS) for x in annos
                ]

            labels = [x["category_id"] for x in annos]
            names = self.metadata.get("thing_classes", None)
            if names:
                labels = [names[i] for i in labels]
            labels = [
                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
                for i, a in zip(labels, annos)
            ]
            self.overlay_instances(labels=labels,
                                   boxes=boxes,
                                   masks=masks,
                                   keypoints=keypts)

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            sem_seg = cv2.imread(dic["sem_seg_file_name"],
                                 cv2.IMREAD_GRAYSCALE)
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
        return self.output
Пример #17
0
    def forward(self, features):
        """
        Args:
            features (list[Tensor]): list of backbone feature maps on which to generate anchors.

        Returns:
            list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map
                (i.e. the cell anchors repeated over all locations in the feature map).
                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
                where Hi, Wi are resolution of the feature map divided by anchor stride.
        """
        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
        return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
Пример #18
0
    def _test_roialignv2_roialignrotated_match(self, device):
        pooler_resolution = 14
        canonical_level = 4
        canonical_scale_factor = 2**canonical_level
        pooler_scales = (1.0 / canonical_scale_factor, )
        sampling_ratio = 0

        N, C, H, W = 2, 4, 10, 8
        N_rois = 10
        std = 11
        mean = 0
        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean

        features = [feature.to(device)]

        rois = []
        rois_rotated = []
        for _ in range(N):
            boxes = self._rand_boxes(num_boxes=N_rois,
                                     x_max=W * canonical_scale_factor,
                                     y_max=H * canonical_scale_factor)

            rotated_boxes = torch.zeros(N_rois, 5)
            rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
            rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
            rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
            rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
            rois.append(Boxes(boxes).to(device))
            rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))

        roialignv2_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type="ROIAlignV2",
        )

        roialignv2_out = roialignv2_pooler(features, rois)

        roialignrotated_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type="ROIAlignRotated",
        )

        roialignrotated_out = roialignrotated_pooler(features, rois_rotated)

        self.assertTrue(
            torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
Пример #19
0
    def label_and_sample_anchors(
        self, anchors: List[RotatedBoxes], gt_instances: List[Instances]
    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
        """
        Args:
            anchors (list[RotatedBoxes]): anchors for each feature map.
            gt_instances: the ground-truth instances for each image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            list[Tensor]:
                i-th element is a Nx5 tensor, where N is the total number of anchors across
                feature maps.  The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as 1.
        """
        anchors = RotatedBoxes.cat(anchors)

        gt_boxes = [x.gt_boxes for x in gt_instances]
        del gt_instances

        gt_labels = []
        matched_gt_boxes = []
        for gt_boxes_i in gt_boxes:
            """
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(
                gt_boxes_i, anchors)
            matched_idxs, gt_labels_i = retry_if_cuda_oom(
                self.anchor_matcher)(match_quality_matrix)
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)

            # A vector of labels (-1, 0, 1) for each anchor
            gt_labels_i = self._subsample_labels(gt_labels_i)

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
            else:
                # TODO wasted indexing computation for ignored boxes
                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor

            gt_labels.append(gt_labels_i)  # N,AHW
            matched_gt_boxes.append(matched_gt_boxes_i)
        return gt_labels, matched_gt_boxes
def grasp_fast_rcnn_inference_single_image_rotated(scores, boxes, tilts, zs,
                                                   image_shape, score_thresh,
                                                   nms_thresh, topk_per_image):
    """
    Single-image inference. Return rotated bounding-box detection results by thresholding
    on scores and applying rotated non-maximum suppression (Rotated NMS).
    Args:
        Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes
        per image.
    Returns:
        Same as `fast_rcnn_inference_rotated`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1) & torch.isfinite(tilts).all(dim=1) & torch.isfinite(zs).all(
            dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        tilts = tilts[valid_mask]
        zs = zs[valid_mask]

    B = 5  # box dimension
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // B
    # Convert to Boxes to use the `clip` function ...
    boxes = RotatedBoxes(boxes.reshape(-1, B))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B)  # R x C x B
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    tilts = tilts[filter_inds[:, 0]]
    zs = zs[filter_inds[:, 0]]

    # Apply per-class Rotated NMS
    keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    tilts, zs = tilts[keep], zs[keep]

    result = Instances(image_shape)
    result.pred_boxes = RotatedBoxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.pred_zs = torch.flatten(zs)
    result.pred_tilts = torch.flatten(tilts)

    return result, filter_inds[:, 0]
Пример #21
0
 def __call__(self, depth_, inst_):
     depth, inst = copy.deepcopy(depth_), copy.deepcopy(inst_)
     
     if np.random.uniform(0, 1) < self.prob:
         depth = np.fliplr(depth)
         
         rbbxs = inst.gt_boxes.tensor.cpu().numpy()
         rbbxs[:, 0] = self.w - rbbxs[:, 0]
         rbbxs[:, 4] = -rbbxs[:, 4]
         rbbxs = rbbxs.reshape(-1, 5)
         gt_boxes = torch.tensor(rbbxs, dtype=torch.float32)
         inst.gt_boxes = RotatedBoxes(gt_boxes)
         inst.gt_tilts = -inst.gt_tilts
         
     return depth, inst
Пример #22
0
    def _match_and_label_boxes(self, proposals, stage, targets):
        """
        Match proposals with groundtruth using the matcher at the given stage.
        Label the proposals as foreground or background based on the match.

        Args:
            proposals (list[Instances]): One Instances for each image, with
                the field "proposal_boxes".
            stage (int): the current stage
            targets (list[Instances]): the ground truth instances

        Returns:
            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
        """
        num_fg_samples, num_bg_samples = [], []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
            )
            # proposal_labels are 0 or 1
            matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
            if len(targets_per_image) > 0:
                gt_classes = targets_per_image.gt_classes[matched_idxs]
                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
                gt_classes[proposal_labels == 0] = self.num_classes
                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
            else:
                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
                gt_boxes = RotatedBoxes(
                    targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
                )
            proposals_per_image.gt_classes = gt_classes
            proposals_per_image.gt_boxes = gt_boxes

            num_fg_samples.append((proposal_labels == 1).sum().item())
            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])

        # Log the number of fg/bg samples in each stage
        storage = get_event_storage()
        storage.put_scalar(
            "stage{}/roi_head/num_fg_samples".format(stage),
            sum(num_fg_samples) / len(num_fg_samples),
        )
        storage.put_scalar(
            "stage{}/roi_head/num_bg_samples".format(stage),
            sum(num_bg_samples) / len(num_bg_samples),
        )
        return proposals
Пример #23
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 5).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single RotatedBoxes per image
        anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes,
                                                       anchors, self.gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            # DEBUG
            #assert torch.all(gt_boxes_i.tensor[:,2] > 1e-5)
            #assert torch.all(gt_boxes_i.tensor[:,3] > 1e-5)
            #assert torch.all(anchors_i.tensor[:,2] > 1e-5)
            #assert torch.all(anchors_i.tensor[:,3] > 1e-5)
            match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i)
            matched_idxs, gt_objectness_logits_i = self.anchor_matcher(
                match_quality_matrix)

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Пример #24
0
    def test_overlay_rotated_instances(self):
        H, W = 100, 150
        img = np.random.rand(H, W, 3) * 255
        num_boxes = 50
        boxes_5d = torch.zeros(num_boxes, 5)
        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W)
        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H)
        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
        rotated_boxes = RotatedBoxes(boxes_5d)
        labels = [str(i) for i in range(num_boxes)]

        v = Visualizer(img, self.metadata)
        output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image()
        self.assertEqual(output.shape, img.shape)
Пример #25
0
    def label_anchors(self, anchors, gt_instances):
        """
        Args:
            anchors (list[Boxes]): A list of #feature level Boxes.
                The Boxes contains anchors of this image on the specific feature level.
            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across all feature maps (sum(Hi * Wi * A)).
                Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background.
            list[Tensor]:
                i-th element is a Rx4 tensor, where R is the total number of anchors across
                feature maps. The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as foreground.
        """
        anchors = RotatedBoxes.cat(anchors)  # Rx4

        gt_labels = []
        matched_gt_boxes = []
        for gt_per_image in gt_instances:
            match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors)
            matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix)
            del match_quality_matrix

            if len(gt_per_image) > 0:
                matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs]

                gt_labels_i = gt_per_image.gt_classes[matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_labels_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_labels_i[anchor_labels == -1] = -1
            else:
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
                gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes

            gt_labels.append(gt_labels_i)
            matched_gt_boxes.append(matched_gt_boxes_i)

        return gt_labels, matched_gt_boxes
    def forward(self, features):
        """
        Args:
            features (list[Tensor]): list of backbone feature maps on which to generate anchors.

        Returns:
            list[list[Boxes]]: a list of #image elements. Each is a list of #feature level Boxes.
                The Boxes contains anchors of this image on the specific feature level.
        """
        num_images = len(features[0])
        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
        anchors_over_all_feature_maps = self.grid_anchors(grid_sizes)

        anchors_in_image = []
        for anchors_per_feature_map in anchors_over_all_feature_maps:
            boxes = RotatedBoxes(anchors_per_feature_map)
            anchors_in_image.append(boxes)

        anchors = [copy.deepcopy(anchors_in_image) for _ in range(num_images)]
        return anchors
Пример #27
0
def get_single_instance(depth, rbbxs):
    inst = Instances(depth.shape)
    
    rbox = rbbxs[:, [0, 1, 4, 3, 5]]
    gt_boxes = torch.tensor(rbox, dtype=torch.float32)
    inst.gt_boxes = RotatedBoxes(gt_boxes)
    inst.gt_boxes.clip(depth.shape)
    
    inst.gt_classes = torch.ones(rbbxs.shape[0], dtype=torch.int64)
    
    gt_tilts = rbbxs[:, 6].astype(np.float32)
    inst.gt_tilts = torch.from_numpy(np.deg2rad(gt_tilts))
    
    gt_z = rbbxs[:, 2].astype(np.float32) * 10
    inst.gt_z = torch.from_numpy(gt_z)
    
    gt_metric = rbbxs[:, 8].astype(np.float32)
    inst.gt_metric = torch.from_numpy(gt_metric)
    
    return inst
def create_instances(predictions, image_size):
    ret = Instances(image_size)

    score = np.asarray([x["score"] for x in predictions])
    chosen = (score > args.conf_threshold).nonzero()[0]
    score = score[chosen]
    bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 5)
    bbox = BoxMode.convert(bbox, BoxMode.XYWHA_ABS, BoxMode.XYWHA_ABS)

    labels = np.asarray(
        [dataset_id_map(predictions[i]["category_id"]) for i in chosen])

    ret.scores = score
    ret.pred_boxes = RotatedBoxes(bbox)
    ret.pred_classes = labels

    try:
        ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
    except KeyError:
        pass
    return ret
Пример #29
0
    def convert_outputs(self, batched_inputs, inputs, results):
        output_names = self.get_output_names()
        assert len(results) == len(output_names)

        m_results = {}
        for k, v in results.items():
            assert k in output_names, k
            m_results[k] = v.to(self._ns.device)

        # TensorRT output number is not dynamic
        image_sizes = inputs["image_sizes"]
        m_instances = [Instances(image_size) for image_size in image_sizes]

        # pred_boxes format: (batch_index, x0, y0, x1, y1)
        pred_boxes = m_results["pred_boxes"][:, 1:]
        scores = m_results["scores"]
        pred_classes = m_results["pred_classes"].to(torch.int64)
        batch_splits = m_results["batch_splits"].to(torch.int64).cpu()
        pred_masks = m_results.get("pred_masks", None)
        if pred_boxes.shape[1] == 5:
            pred_boxes = RotatedBoxes(pred_boxes)
        else:
            pred_boxes = Boxes(pred_boxes)

        offset = 0
        for i in range(len(batched_inputs)):
            next_offset = offset + batch_splits[i]
            m_instances[i].pred_boxes = pred_boxes[offset:next_offset]
            m_instances[i].scores = scores[offset:next_offset]
            m_instances[i].pred_classes = pred_classes[offset:next_offset]
            if "pred_masks" in m_results:
                num_masks = batch_splits[i]
                indices = torch.arange(num_masks, device=pred_classes.device)
                m_instances[i].pred_masks = \
                    pred_masks[offset:next_offset][indices, m_instances[i].pred_classes][:, None]
            offset += int(len(pred_boxes) / len(batched_inputs))

        return meta_arch.GeneralizedRCNN._postprocess(m_instances,
                                                      batched_inputs,
                                                      image_sizes)
Пример #30
0
def rotated_annotations_to_instances(annos, image_size):
    boxes = [obj["bbox"] for obj in annos]
    boxes = torch.tensor(boxes, dtype=torch.float)
    target = Instances(image_size)
    boxes = target.gt_boxes = RotatedBoxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes
    #del boxes, classes
    # include component list into target
    # if len(annos) and "component" in annos[0]:
    #     component = []
    #     for obj in annos:
    #           torch.stack
    #         component.append(obj["component"])
    #     # component = np.array(component)
    #
    #     #component = torch.tensor(component, dtype=torch.int8)
    # target.gt_component = np.array(component)
    return target