def compute_iou_dt_gt(self, dt, gt, is_crowd): if self.is_rotated(dt) or self.is_rotated(gt): # TODO: take is_crowd into consideration assert all(c == 0 for c in is_crowd) dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) return pairwise_iou_rotated(dt, gt) else: # This is the same as the classical COCO evaluation return maskUtils.iou(dt, gt, is_crowd)
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 5). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single RotatedBoxes per image anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in self.anchors] for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = self.anchor_matcher( match_quality_matrix) if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in cvpods anchors_inside_image = anchors_i.inside_box( image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def label_and_sample_proposals(self, proposals, targets): """ Prepare some proposals to be used to train the RROI heads. It performs box matching between `proposals` and `targets`, and assigns training labels to the proposals. It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than `self.positive_sample_fraction. Args: See :meth:`StandardROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the rotated proposal boxes - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) - gt_classes: the ground-truth classification lable for each proposal """ gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, matched_labels = self.proposal_matcher( match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes) proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes if has_gt: sampled_targets = matched_idxs[sampled_idxs] proposals_per_image.gt_boxes = targets_per_image.gt_boxes[ sampled_targets] else: gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_idxs), 5))) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append( (gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt