def test_fast_rcnn_rotated(self): torch.manual_seed(132) box_head_output_size = 8 box_predictor = RotatedFastRCNNOutputLayers( ShapeSpec(channels=box_head_output_size), box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), num_classes=5, ) feature_pooled = torch.rand(2, box_head_output_size) predictions = box_predictor(feature_pooled) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) proposal = Instances((10, 10)) proposal.proposal_boxes = RotatedBoxes(proposal_boxes) proposal.gt_boxes = RotatedBoxes(gt_boxes) proposal.gt_classes = torch.tensor([1, 2]) with EventStorage(): # capture events in a new storage to discard them losses = box_predictor.losses(predictions, [proposal]) # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = build_roi_heads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.365657806396484, "loss_box_reg": 0.0015851043863222003, "loss_rpn_cls": 0.2427729219198227, "loss_rpn_loc": 0.3646621108055115, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys()) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()}), )
def compute_iou_dt_gt(self, dt, gt, is_crowd): if self.is_rotated(dt) or self.is_rotated(gt): # TODO: take is_crowd into consideration assert all(c == 0 for c in is_crowd) dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) return pairwise_iou_rotated(dt, gt) else: # This is the same as the classical COCO evaluation return maskUtils.iou(dt, gt, is_crowd)
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.381618499755859), "loss_box_reg": torch.tensor(0.0011829272843897343), } for name in expected_losses.keys(): err_msg = "detector_losses[{}] = {}, expected losses = {}".format( name, detector_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
def cgrcnn_mapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) depth = np.load(dataset_dict["file_name"]).astype(np.float32) inst = Instances(depth.shape) depth = torch.from_numpy(np.tile(depth, (3, 1, 1))) grasps = dataset_dict["annotations"] gt_boxes, gt_tilts, gt_z, gt_metric = None, None, None, None for grasp in grasps: box, z, tilt, metric = np.array(grasp["bbox"]), np.array( grasp["z"]), np.array(grasp["tilt"]), np.array(grasp["metric"]) if gt_boxes is None: gt_boxes, gt_tilts, gt_z, gt_metric = box, tilt, z, metric else: gt_boxes = np.vstack((gt_boxes, box)) gt_tilts = np.hstack((gt_tilts, tilt)) gt_z = np.hstack((gt_z, z)) gt_metric = np.hstack((gt_metric, metric)) inst.gt_boxes = RotatedBoxes( torch.from_numpy(gt_boxes.astype(np.float32).reshape(-1, 5))) # inst.gt_tilts = torch.from_numpy(gt_tilts.astype(np.float32)) # inst.gt_z = torch.from_numpy(gt_z.astype(np.float32)) # inst.gt_metric = torch.from_numpy(gt_metric.astype(np.float32)) inst.gt_classes = torch.ones(gt_boxes.shape[0], dtype=torch.int64) return {"image": depth, "instances": inst}
def label_and_sample_proposals(self, proposals, targets): """ Prepare some proposals to be used to train the RROI heads. It performs box matching between `proposals` and `targets`, and assigns training labels to the proposals. It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than `self.positive_sample_fraction. Args: See :meth:`StandardROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the rotated proposal boxes - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) - gt_classes: the ground-truth classification lable for each proposal """ gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes ) proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes if has_gt: sampled_targets = matched_idxs[sampled_idxs] proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] else: gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5)) ) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append((gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt
def convert_outputs(self, batched_inputs, inputs, results): image_sizes = inputs["image_sizes"] m_results = [Instances(image_size) for image_size in image_sizes] pred_boxes = results["pred_boxes"] scores = results["scores"] pred_classes = results["pred_classes"].to(torch.int64) batch_splits = results["batch_splits"].to(torch.int64).cpu() pred_masks = results.get("pred_masks", None) if pred_boxes.shape[1] == 5: pred_boxes = RotatedBoxes(pred_boxes) else: pred_boxes = Boxes(pred_boxes) offset = 0 for i in range(len(batched_inputs)): next_offset = offset + batch_splits[i] m_results[i].pred_boxes = pred_boxes[offset:next_offset] m_results[i].scores = scores[offset:next_offset] m_results[i].pred_classes = pred_classes[offset:next_offset] if "pred_masks" in results: num_masks = batch_splits[i] indices = torch.arange(num_masks, device=pred_classes.device) m_results[i].pred_masks = \ pred_masks[offset:next_offset][indices, m_results[i].pred_classes][:, None] offset = next_offset return meta_arch.GeneralizedRCNN._postprocess(m_results, batched_inputs, image_sizes)
def dota_annotations_to_instances(annos, image_size): target = Instances(image_size) obb_boxes = [obj["boxes"] for obj in annos] obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes) obb_boxes.clip(image_size) pt_hbb, pt_inbox, polygons = [], [], [] rotate_boxes = obb_boxes.tensor.numpy() data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes] for d in data: pt_hbb.append(d[0]) pt_inbox.append(d[1]) polygons.append(d[2]) target.gt_pt_inbox_boxes = Boxes(pt_inbox) target.gt_pt_hbb_boxes = Boxes(pt_hbb) classes = [obj["category_id"] + 1 for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes masks = PolygonMasks(polygons) target.gt_masks = masks if len(target) > 2000: mask = random.sample(list(range(0, len(target))), 2000) target = target[mask] return target
def _create_proposals_from_boxes(self, boxes, image_sizes): """ Args: boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4 image_sizes (list[tuple]): list of image shapes in (h, w) Returns: list[Instances]: per-image proposals with the given boxes. """ # Just like RPN, the proposals should not have gradients boxes = [RotatedBoxes(b.detach()) for b in boxes] proposals = [] for boxes_per_image, image_size in zip(boxes, image_sizes): boxes_per_image.clip(image_size) if self.training: # do not filter empty boxes at inference time, # because the scores from each stage need to be aligned and added later boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] if (not boxes_per_image.nonempty().all()): print("create_proposals") print(boxes_per_image) prop = Instances(image_size) prop.proposal_boxes = boxes_per_image proposals.append(prop) return proposals
def select_over_all_levels(self, bboxlist, scorelist, cls_list, image_sizes): # num_images = len(image_sizes) results = [] for i, (boxes, labels, scores) in enumerate(zip(bboxlist, cls_list, scorelist)): # skip the background keep = batched_nms_rotated(boxes, scores, labels, self.nms_thresh) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] number_of_detections = boxes.size(0) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = scores.clone() image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] result = Instances(image_sizes[i]) result.pred_boxes = RotatedBoxes(boxes) result.scores = scores result.pred_classes = labels results.append(result) return results
def annotations_to_instances_rotated(annos, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Compared to `annotations_to_instances`, this function is for rotated boxes only Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: Containing fields "gt_boxes", "gt_classes", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [obj["bbox"] for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = RotatedBoxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes return target
def test_fast_rcnn_rotated(self): torch.manual_seed(132) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) box_head_output_size = 8 num_classes = 5 cls_agnostic_bbox_reg = False box_predictor = FastRCNNOutputLayers(box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=5) feature_pooled = torch.rand(2, box_head_output_size) pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled) image_shape = (10, 10) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) result = Instances(image_shape) result.proposal_boxes = RotatedBoxes(proposal_boxes) result.gt_boxes = RotatedBoxes(gt_boxes) result.gt_classes = torch.tensor([1, 2]) proposals = [] proposals.append(result) smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA outputs = FastRCNNOutputs(box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta) with EventStorage(): # capture events in a new storage to discard them losses = outputs.losses() # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def __init__( self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta=0, ): """ Args: box2box_transform (Box2BoxTransform/Box2BoxTransformRotated): box2box transform instance for proposal-to-detection transformations. pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class logits for all R predicted object instances. Each row corresponds to a predicted object instance. pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for class-specific or class-agnostic regression. It stores the predicted deltas that transform proposals into final box detections. B is the box dimension (4 or 5). When B is 4, each row is [dx, dy, dw, dh (, ....)]. When B is 5, each row is [dx, dy, dw, dh, da (, ....)]. proposals (list[Instances]): A list of N Instances, where Instances i stores the proposals for image i, in the field "proposal_boxes". When training, each Instances must have ground-truth labels stored in the field "gt_classes" and "gt_boxes". The total number of all instances must be equal to R. smooth_l1_beta (float): The transition point between L1 and L2 loss in the smooth L1 loss function. When set to 0, the loss becomes L1. When set to +inf, the loss becomes constant 0. """ self.box2box_transform = box2box_transform self.num_preds_per_image = [len(p) for p in proposals] self.pred_class_logits = pred_class_logits self.pred_proposal_deltas = pred_proposal_deltas self.smooth_l1_beta = smooth_l1_beta self.image_shapes = [x.image_size for x in proposals] if len(proposals): box_type = type(proposals[0].proposal_boxes) # cat(..., dim=0) concatenates over all images in the batch self.proposals = box_type.cat( [p.proposal_boxes for p in proposals]) assert (not self.proposals.tensor.requires_grad ), "Proposals should not require gradients!" # The following fields should exist only when training. if proposals[0].has("gt_boxes"): self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals]) assert proposals[0].has("gt_classes") self.gt_classes = cat([p.gt_classes for p in proposals], dim=0) else: if self.pred_proposal_deltas.shape[1] == 4: self.proposals = Boxes( torch.zeros(0, 4, device=self.pred_proposal_deltas.device)) else: self.proposals = RotatedBoxes( torch.zeros(0, 5, device=self.pred_proposal_deltas.device)) self._no_instances = self.pred_proposal_deltas.size( 0) == 0 # no instances found
def dota_annotations_to_instances(annos, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ target = Instances(image_size) obb_boxes = [obj["boxes"] for obj in annos] obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes) # obb_boxes.clip(image_size) pt_hbb, pt_inbox, polygons = [], [], [] rotate_boxes = obb_boxes.tensor.numpy() data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes] for d in data: pt_hbb.append(d[0]) pt_inbox.append(d[1]) polygons.append(d[2]) pt_inbox = torch.as_tensor(pt_inbox).to(dtype=torch.float) target.gt_pt_inbox_boxes = Boxes(pt_inbox) pt_hbb = torch.as_tensor(pt_hbb).to(dtype=torch.float) target.gt_pt_hbb_boxes = Boxes(pt_hbb) # for sigmoid_focal_loss_jit the category id should start with 0 # for SigmoidFocalLoss in layers the category id should start with 1 classes = [obj["category_id"] + 1 for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes # masks = PolygonMasks(polygons) masks_areas = target.gt_pt_hbb_boxes.area() # masks = torch.as_tensor(masks.polygons).to(dtype=torch.float) # target.gt_poly = masks.view(-1, 8) target.gt_areas = masks_areas.to(dtype=torch.float) if len(target) > 1000: mask = random.sample(list(range(0, len(target))), 1000) target = target[mask] return target
def computeIoU(self, imgId, catId): p = self.params if p.useCats: gt = self._gts[imgId, catId] dt = self._dts[imgId, catId] else: gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] if len(gt) == 0 and len(dt) == 0: return [] inds = np.argsort([-d['score'] for d in dt], kind='mergesort') dt = [dt[i] for i in inds] if len(dt) > p.maxDets[-1]: dt = dt[0:p.maxDets[-1]] ious = np.zeros((len(dt), len(gt))) for j, g in enumerate(gt): for i, d in enumerate(dt): # create bounds for ignore regions(double the gt bbox) gt_rotated_box = RotatedBoxes( torch.tensor(g['bbox'], dtype=torch.float).view(-1, 5)) dt_rotated_box = RotatedBoxes( torch.tensor(d['bbox'], dtype=torch.float).view(-1, 5)) ious[i, j] = pairwise_iou_rotated(gt_rotated_box, dt_rotated_box) del gt_rotated_box, dt_rotated_box # if p.iouType == 'segm': # g = [g['segmentation'] for g in gt] # d = [d['segmentation'] for d in dt] # elif p.iouType == 'bbox': # g = [g['bbox'] for g in gt] # d = [d['bbox'] for d in dt] # else: # raise Exception('unknown iouType for iou computation') # # # compute iou between each dt and gt region # iscrowd = [int(o['iscrowd']) for o in gt] # ious = maskUtils.iou(d,g,iscrowd) del gt, dt return ious
def draw_dataset_dict(self, dic): """ Draw annotations/segmentaions in Detectron2 Dataset format. Args: dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None if annos[0]["bbox_mode"] == BoxMode.XYWHA_ABS: boxes = RotatedBoxes( torch.stack([torch.as_tensor(x["bbox"]) for x in annos])) else: boxes = [ BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos ] labels = [x["category_id"] for x in annos] names = self.metadata.get("thing_classes", None) if names: labels = [names[i] for i in labels] labels = [ "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") for i, a in zip(labels, annos) ] self.overlay_instances(labels=labels, boxes=boxes, masks=masks, keypoints=keypts) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: sem_seg = cv2.imread(dic["sem_seg_file_name"], cv2.IMREAD_GRAYSCALE) if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) return self.output
def forward(self, features): """ Args: features (list[Tensor]): list of backbone feature maps on which to generate anchors. Returns: list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map (i.e. the cell anchors repeated over all locations in the feature map). The number of anchors of each feature map is Hi x Wi x num_cell_anchors, where Hi, Wi are resolution of the feature map divided by anchor stride. """ grid_sizes = [feature_map.shape[-2:] for feature_map in features] anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
def _test_roialignv2_roialignrotated_match(self, device): pooler_resolution = 14 canonical_level = 4 canonical_scale_factor = 2**canonical_level pooler_scales = (1.0 / canonical_scale_factor, ) sampling_ratio = 0 N, C, H, W = 2, 4, 10, 8 N_rois = 10 std = 11 mean = 0 feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean features = [feature.to(device)] rois = [] rois_rotated = [] for _ in range(N): boxes = self._rand_boxes(num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor) rotated_boxes = torch.zeros(N_rois, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] rois.append(Boxes(boxes).to(device)) rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) roialignv2_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="ROIAlignV2", ) roialignv2_out = roialignv2_pooler(features, rois) roialignrotated_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="ROIAlignRotated", ) roialignrotated_out = roialignrotated_pooler(features, rois_rotated) self.assertTrue( torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
def label_and_sample_anchors( self, anchors: List[RotatedBoxes], gt_instances: List[Instances] ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: """ Args: anchors (list[RotatedBoxes]): anchors for each feature map. gt_instances: the ground-truth instances for each image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across feature maps. Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. list[Tensor]: i-th element is a Nx5 tensor, where N is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as 1. """ anchors = RotatedBoxes.cat(anchors) gt_boxes = [x.gt_boxes for x in gt_instances] del gt_instances gt_labels = [] matched_gt_boxes = [] for gt_boxes_i in gt_boxes: """ gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)( gt_boxes_i, anchors) matched_idxs, gt_labels_i = retry_if_cuda_oom( self.anchor_matcher)(match_quality_matrix) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) # A vector of labels (-1, 0, 1) for each anchor gt_labels_i = self._subsample_labels(gt_labels_i) if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background matched_gt_boxes_i = torch.zeros_like(anchors.tensor) else: # TODO wasted indexing computation for ignored boxes matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor gt_labels.append(gt_labels_i) # N,AHW matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def grasp_fast_rcnn_inference_single_image_rotated(scores, boxes, tilts, zs, image_shape, score_thresh, nms_thresh, topk_per_image): """ Single-image inference. Return rotated bounding-box detection results by thresholding on scores and applying rotated non-maximum suppression (Rotated NMS). Args: Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference_rotated`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) & torch.isfinite(tilts).all(dim=1) & torch.isfinite(zs).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] tilts = tilts[valid_mask] zs = zs[valid_mask] B = 5 # box dimension scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // B # Convert to Boxes to use the `clip` function ... boxes = RotatedBoxes(boxes.reshape(-1, B)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] tilts = tilts[filter_inds[:, 0]] zs = zs[filter_inds[:, 0]] # Apply per-class Rotated NMS keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] tilts, zs = tilts[keep], zs[keep] result = Instances(image_shape) result.pred_boxes = RotatedBoxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] result.pred_zs = torch.flatten(zs) result.pred_tilts = torch.flatten(tilts) return result, filter_inds[:, 0]
def __call__(self, depth_, inst_): depth, inst = copy.deepcopy(depth_), copy.deepcopy(inst_) if np.random.uniform(0, 1) < self.prob: depth = np.fliplr(depth) rbbxs = inst.gt_boxes.tensor.cpu().numpy() rbbxs[:, 0] = self.w - rbbxs[:, 0] rbbxs[:, 4] = -rbbxs[:, 4] rbbxs = rbbxs.reshape(-1, 5) gt_boxes = torch.tensor(rbbxs, dtype=torch.float32) inst.gt_boxes = RotatedBoxes(gt_boxes) inst.gt_tilts = -inst.gt_tilts return depth, inst
def _match_and_label_boxes(self, proposals, stage, targets): """ Match proposals with groundtruth using the matcher at the given stage. Label the proposals as foreground or background based on the match. Args: proposals (list[Instances]): One Instances for each image, with the field "proposal_boxes". stage (int): the current stage targets (list[Instances]): the ground truth instances Returns: list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" """ num_fg_samples, num_bg_samples = [], [] for proposals_per_image, targets_per_image in zip(proposals, targets): match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) # proposal_labels are 0 or 1 matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) if len(targets_per_image) > 0: gt_classes = targets_per_image.gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposal_labels == 0] = self.num_classes gt_boxes = targets_per_image.gt_boxes[matched_idxs] else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) ) proposals_per_image.gt_classes = gt_classes proposals_per_image.gt_boxes = gt_boxes num_fg_samples.append((proposal_labels == 1).sum().item()) num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) # Log the number of fg/bg samples in each stage storage = get_event_storage() storage.put_scalar( "stage{}/roi_head/num_fg_samples".format(stage), sum(num_fg_samples) / len(num_fg_samples), ) storage.put_scalar( "stage{}/roi_head/num_bg_samples".format(stage), sum(num_bg_samples) / len(num_bg_samples), ) return proposals
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 5). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single RotatedBoxes per image anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in self.anchors] for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ # DEBUG #assert torch.all(gt_boxes_i.tensor[:,2] > 1e-5) #assert torch.all(gt_boxes_i.tensor[:,3] > 1e-5) #assert torch.all(anchors_i.tensor[:,2] > 1e-5) #assert torch.all(anchors_i.tensor[:,3] > 1e-5) match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = self.anchor_matcher( match_quality_matrix) if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box( image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def test_overlay_rotated_instances(self): H, W = 100, 150 img = np.random.rand(H, W, 3) * 255 num_boxes = 50 boxes_5d = torch.zeros(num_boxes, 5) boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W) boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H) boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) rotated_boxes = RotatedBoxes(boxes_5d) labels = [str(i) for i in range(num_boxes)] v = Visualizer(img, self.metadata) output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image() self.assertEqual(output.shape, img.shape)
def label_anchors(self, anchors, gt_instances): """ Args: anchors (list[Boxes]): A list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. gt_instances (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across all feature maps (sum(Hi * Wi * A)). Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as foreground. """ anchors = RotatedBoxes.cat(anchors) # Rx4 gt_labels = [] matched_gt_boxes = [] for gt_per_image in gt_instances: match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix) del match_quality_matrix if len(gt_per_image) > 0: matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] gt_labels_i = gt_per_image.gt_classes[matched_idxs] # Anchors with label 0 are treated as background. gt_labels_i[anchor_labels == 0] = self.num_classes # Anchors with label -1 are ignored. gt_labels_i[anchor_labels == -1] = -1 else: matched_gt_boxes_i = torch.zeros_like(anchors.tensor) gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes gt_labels.append(gt_labels_i) matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def forward(self, features): """ Args: features (list[Tensor]): list of backbone feature maps on which to generate anchors. Returns: list[list[Boxes]]: a list of #image elements. Each is a list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. """ num_images = len(features[0]) grid_sizes = [feature_map.shape[-2:] for feature_map in features] anchors_over_all_feature_maps = self.grid_anchors(grid_sizes) anchors_in_image = [] for anchors_per_feature_map in anchors_over_all_feature_maps: boxes = RotatedBoxes(anchors_per_feature_map) anchors_in_image.append(boxes) anchors = [copy.deepcopy(anchors_in_image) for _ in range(num_images)] return anchors
def get_single_instance(depth, rbbxs): inst = Instances(depth.shape) rbox = rbbxs[:, [0, 1, 4, 3, 5]] gt_boxes = torch.tensor(rbox, dtype=torch.float32) inst.gt_boxes = RotatedBoxes(gt_boxes) inst.gt_boxes.clip(depth.shape) inst.gt_classes = torch.ones(rbbxs.shape[0], dtype=torch.int64) gt_tilts = rbbxs[:, 6].astype(np.float32) inst.gt_tilts = torch.from_numpy(np.deg2rad(gt_tilts)) gt_z = rbbxs[:, 2].astype(np.float32) * 10 inst.gt_z = torch.from_numpy(gt_z) gt_metric = rbbxs[:, 8].astype(np.float32) inst.gt_metric = torch.from_numpy(gt_metric) return inst
def create_instances(predictions, image_size): ret = Instances(image_size) score = np.asarray([x["score"] for x in predictions]) chosen = (score > args.conf_threshold).nonzero()[0] score = score[chosen] bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 5) bbox = BoxMode.convert(bbox, BoxMode.XYWHA_ABS, BoxMode.XYWHA_ABS) labels = np.asarray( [dataset_id_map(predictions[i]["category_id"]) for i in chosen]) ret.scores = score ret.pred_boxes = RotatedBoxes(bbox) ret.pred_classes = labels try: ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] except KeyError: pass return ret
def convert_outputs(self, batched_inputs, inputs, results): output_names = self.get_output_names() assert len(results) == len(output_names) m_results = {} for k, v in results.items(): assert k in output_names, k m_results[k] = v.to(self._ns.device) # TensorRT output number is not dynamic image_sizes = inputs["image_sizes"] m_instances = [Instances(image_size) for image_size in image_sizes] # pred_boxes format: (batch_index, x0, y0, x1, y1) pred_boxes = m_results["pred_boxes"][:, 1:] scores = m_results["scores"] pred_classes = m_results["pred_classes"].to(torch.int64) batch_splits = m_results["batch_splits"].to(torch.int64).cpu() pred_masks = m_results.get("pred_masks", None) if pred_boxes.shape[1] == 5: pred_boxes = RotatedBoxes(pred_boxes) else: pred_boxes = Boxes(pred_boxes) offset = 0 for i in range(len(batched_inputs)): next_offset = offset + batch_splits[i] m_instances[i].pred_boxes = pred_boxes[offset:next_offset] m_instances[i].scores = scores[offset:next_offset] m_instances[i].pred_classes = pred_classes[offset:next_offset] if "pred_masks" in m_results: num_masks = batch_splits[i] indices = torch.arange(num_masks, device=pred_classes.device) m_instances[i].pred_masks = \ pred_masks[offset:next_offset][indices, m_instances[i].pred_classes][:, None] offset += int(len(pred_boxes) / len(batched_inputs)) return meta_arch.GeneralizedRCNN._postprocess(m_instances, batched_inputs, image_sizes)
def rotated_annotations_to_instances(annos, image_size): boxes = [obj["bbox"] for obj in annos] boxes = torch.tensor(boxes, dtype=torch.float) target = Instances(image_size) boxes = target.gt_boxes = RotatedBoxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes #del boxes, classes # include component list into target # if len(annos) and "component" in annos[0]: # component = [] # for obj in annos: # torch.stack # component.append(obj["component"]) # # component = np.array(component) # # #component = torch.tensor(component, dtype=torch.int8) # target.gt_component = np.array(component) return target