def func(x): boxes = Boxes(x) return boxes.area()
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: #print('anns 0:', annos[0:2]) segms = [obj["segmentation"] for obj in annos] bo_segms = [obj["bg_object_segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) bo_masks = PolygonMasks(bo_segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim ) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm)) ) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) ) target.gt_masks = masks target.gt_bo_masks = bo_masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper`. Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * "image": Tensor, image in (C, H, W) format. * "instances": Instances * "sem_seg": semantic segmentation ground truth. * Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: each dict is the results for one image. The dict contains the following keys: * "instances": see :meth:`GeneralizedRCNN.forward` for its format. * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`. See the return value of :func:`combine_semantic_and_instance_outputs` for its format. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, SIZE_DIVISIBILITY) score_sem, score_inst, score_conf = self.seg_model(images.tensor) h, w = images.tensor.size(2), images.tensor.size(3) score_inst = F.upsample(input=score_inst, size=(h, w), mode='bilinear') score_sem = F.upsample(input=score_sem, size=(h, w), mode='bilinear') score_conf_softmax = self.softmax_layer(score_conf) score_inst_sig = self.sigmoid_layer(score_inst) score_inst_sig_stuff = score_inst_sig[:, :BACKGROUND_NUM] score_inst_sig_thing = score_inst_sig[:, BACKGROUND_NUM:] if "sem_seg" in batched_inputs[0]: gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] gt_sem_seg = ImageList.from_tensors(gt_sem_seg, SIZE_DIVISIBILITY, IGNORE_LABEL_SEM).tensor else: gt_sem_seg = None if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] else: gt_instances = None #pdb.set_trace() if self.training: assert (gt_sem_seg - 1 < 0).sum() == 0 sem_seg_losses = self.criterion_sem(score_sem, gt_sem_seg - 1) gt_sem_seg[gt_sem_seg > BACKGROUND_NUM] = 0 gt_stuff = F.one_hot(gt_sem_seg, num_classes=BACKGROUND_NUM + 1).permute( 0, 3, 1, 2) gt_stuff = gt_stuff[:, 1:] num_inst = sum( [len(gt_instances[i]) for i in range(len(gt_instances))]) num_inst = torch.as_tensor([num_inst], dtype=torch.float, device=self.device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_inst) num_inst = torch.clamp(num_inst / get_world_size(), min=1).item() loss_stuff_dice = 0. loss_thing_dice = 0. loss_stuff_focal = 0. loss_conf = 0. for i in range(len(batched_inputs)): gt_inst = gt_instances[i] gt_classes = gt_inst.gt_classes if gt_inst.has('gt_masks'): gt_masks = gt_inst.gt_masks masks = torch.stack([ torch.from_numpy( polygons_to_bitmask(poly, gt_inst.image_size[0], gt_inst.image_size[1])).to( self.device) for poly in gt_masks.polygons ], 0) masks_pad = masks.new_full( (masks.shape[0], images.tensor.shape[-2], images.tensor.shape[-1]), False) masks_pad[:, :masks.shape[-2], :masks.shape[-1]].copy_( masks) else: masks_pad = torch.zeros( [0, images.tensor.shape[-2], images.tensor.shape[-1]], dtype=torch.bool, device=self.device) row_ind, col_ind = MatchDice(score_inst_sig_thing[i:i + 1], torch.unsqueeze(masks_pad, 0), score_conf_softmax[i:i + 1], gt_classes) col_ind_empty = np.setdiff1d( np.arange(score_inst_sig_thing[i:i + 1].shape[1]), col_ind) score_inst_sig_perm = torch.cat( (score_inst_sig_stuff[i], score_inst_sig_thing[i, col_ind, :, :]), 0) target_inst_perm = torch.cat( (gt_stuff[i].float(), masks_pad[row_ind].float()), 0) loss_stuff_dice_tmp, loss_thing_dice_tmp = dice_loss( score_inst_sig_perm, target_inst_perm, num_inst, background_channels=BACKGROUND_NUM, valid_mask=None, sigmoid_clip=True) loss_stuff_dice += loss_stuff_dice_tmp loss_thing_dice += loss_thing_dice_tmp target_conf = gt_classes.new_full((score_conf.shape[1], ), FOREGROUND_NUM) target_conf[:len(gt_classes[row_ind])] = gt_classes[row_ind] loss_conf_tmp = conf_loss(torch.cat( (score_conf[i, col_ind], score_conf[i, col_ind_empty]), 0), target_conf.long(), neg_factor=10, neg_idx=FOREGROUND_NUM) loss_conf += loss_conf_tmp loss_stuff_focal_tmp = focal_loss(score_inst_sig_stuff[i], gt_stuff[i].float(), valid_mask=None, sigmoid_clip=True) loss_stuff_focal += loss_stuff_focal_tmp loss_stuff_focal = loss_stuff_focal / len(batched_inputs) loss_stuff_dice = loss_stuff_dice / len(batched_inputs) loss_conf = loss_conf / len(batched_inputs) loss_stuff_focal = loss_stuff_focal * 100. loss_conf = loss_conf * 5 losses = {} losses.update({"loss_sem_seg": sem_seg_losses}) losses.update({"loss_stuff_focal": loss_stuff_focal}) losses.update({"loss_stuff_dice": loss_stuff_dice}) losses.update({"loss_thing_dice": loss_thing_dice}) losses.update({"loss_conf": loss_conf}) return losses score_sem_null = score_sem.new_full( (score_sem.shape[0], 1, score_sem.shape[-2], score_sem.shape[-1]), -1000.) processed_results = [] for i in range(len(batched_inputs)): height = batched_inputs[i].get("height", images.image_sizes[i][0]) width = batched_inputs[i].get("width", images.image_sizes[i][1]) score_inst_sig_stuff_b = F.interpolate(score_inst_sig_stuff[ i:i + 1, :, :images.image_sizes[i][0], :images.image_sizes[i][1]], size=(height, width), mode="bilinear", align_corners=False) score_inst_sig_thing_b = F.interpolate(score_inst_sig_thing[ i:i + 1, :, :images.image_sizes[i][0], :images.image_sizes[i][1]], size=(height, width), mode="bilinear", align_corners=False) img_name = os.path.basename(batched_inputs[i]['file_name']) img_name_split = img_name.split('.') save_dir = '/home/yz9244/detectron2/output/vis_inst_sig' for j in range(80): pred_inst_tmp = np.asarray( 255 * (score_inst_sig_thing_b[0, j].cpu().numpy()), dtype=np.uint8) img = Image.fromarray(pred_inst_tmp) save_img = Image.new('RGB', (img.width, 2 * img.height)) img = Image.fromarray(pred_inst_tmp) save_img.paste(img, (0, 0)) pred_inst_tmp = np.asarray(255 * (pred_inst_tmp > 127), dtype=np.uint8) img = Image.fromarray(pred_inst_tmp) save_img.paste(img, (0, img.height)) save_img.save( os.path.join(save_dir, img_name_split[0] + '_%02d.png' % (j))) res = {} score_sem_foreground = torch.log( torch.exp(score_sem[i:i + 1, BACKGROUND_NUM:]).sum(dim=1, keepdim=True)) sem_seg_result = torch.cat( (score_sem_foreground, score_sem[i:i + 1, :BACKGROUND_NUM]), 1) sem_seg_r = sem_seg_postprocess(sem_seg_result[0], images.image_sizes[i], height, width) res.update({"sem_seg": sem_seg_r}) result = Instances((height, width)) inst_sem_id = torch.argmax(score_conf_softmax[i], dim=1) scores = score_conf_softmax[i, range(score_conf.shape[1]), inst_sem_id] scores = scores[inst_sem_id != FOREGROUND_NUM] pred_classes = inst_sem_id[inst_sem_id != FOREGROUND_NUM] pred_masks = score_inst_sig_thing_b[0, inst_sem_id != FOREGROUND_NUM] pred_mask_sum = torch.sum(pred_masks > 0.5, (1, 2)) result.pred_masks = pred_masks[pred_mask_sum > 0] > 0.5 result.pred_classes = pred_classes[pred_mask_sum > 0] result.scores = scores[pred_mask_sum > 0] box_tmp = torch.zeros(result.pred_masks.shape[0], 4) for j in range(result.pred_masks.shape[0]): nonzero_idx = torch.nonzero(result.pred_masks[j]) box_tmp[j, 0] = nonzero_idx[:, 1].min().item() box_tmp[j, 2] = nonzero_idx[:, 1].max().item() box_tmp[j, 1] = nonzero_idx[:, 0].min().item() box_tmp[j, 3] = nonzero_idx[:, 0].max().item() result.pred_boxes = Boxes(box_tmp) #detector_r = detector_postprocess(result, height, width) detector_r = result res.update({"instances": detector_r}) panoptic_r = combine_semantic_and_instance_outputs( result.scores, result.pred_classes, pred_masks[pred_mask_sum > 0], score_inst_sig_stuff_b[0]) res.update({"panoptic_seg": panoptic_r}) processed_results.append(res) return processed_results
def forward(self, scores, proposal_boxes): instances = Instances((10, 10)) instances.proposal_boxes = Boxes(proposal_boxes) return self._output_layer.predict_probs((scores, None), [instances])
def _create_instances_fulldp(self): image_shape = (680, 840) instances = Instances(image_shape) instances.gt_boxes = Boxes( torch.as_tensor([ [65.0, 55.0, 165.0, 155.0], [170.0, 175.0, 275.0, 280.0], [55.0, 165.0, 165.0, 275.0], ])) instances.proposal_boxes = Boxes( torch.as_tensor([ [66.0, 54.0, 166.0, 154.0], [171.0, 174.0, 276.0, 279.0], [56.0, 164.0, 166.0, 274.0], ])) instances.gt_densepose = DensePoseList( [ self._create_dp_data( { "dp_x": [149.99, 198.62, 157.59], "dp_y": [170.74, 197.73, 123.12], "dp_vertex": [3, 4, 5], "ref_model": "cat_5001", "dp_masks": [], }, { "c": (100, 100), "r": 50 }, ), self._create_dp_data( { "dp_x": [234.53, 116.72, 71.66], "dp_y": [107.53, 11.31, 142.32], "dp_vertex": [6, 7, 8], "ref_model": "dog_5002", "dp_masks": [], }, { "c": (200, 150), "r": 40 }, ), self._create_dp_data( { "dp_x": [225.54, 202.61, 135.90], "dp_y": [167.46, 181.00, 211.47], "dp_vertex": [9, 10, 11], "ref_model": "elephant_5002", "dp_masks": [], }, { "c": (100, 200), "r": 45 }, ), ], instances.gt_boxes, image_shape, ) return instances
def inference_single_image(self, anchors, box_cls, box_delta, image_size): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Arguments: anchors (list[Boxes]): list of #feature levels. Each entry contains a Boxes object, which contains all the anchors in that feature level. box_cls (list[Tensor]): list of #feature levels. Each entry contains tensor of size (H x W x A, K) box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. image_size (tuple(H, W)): a tuple of the image height and width. Returns: Same as `inference`, but for only one image. """ boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): # (HxWxAxK,) box_cls_i = box_cls_i.flatten().sigmoid_() # Keep top k top scoring indices only. num_topk = min(self.topk_candidates, box_reg_i.size(0)) # torch.sort is actually faster than .topk (at least on GPUs) predicted_prob, topk_idxs = box_cls_i.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] anchor_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes box_reg_i = box_reg_i[anchor_idxs] anchors_i = anchors_i[anchor_idxs] # predict boxes predicted_boxes = self.box2box_transform.apply_deltas( box_reg_i, anchors_i.tensor) boxes_all.append(predicted_boxes) scores_all.append(predicted_prob) class_idxs_all.append(classes_idxs) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def label_and_sample_proposals( self, proposals: List[Instances], targets: List[Instances]) -> List[Instances]: """ Prepare some proposals to be used to train the ROI heads. It performs box matching between `proposals` and `targets`, and assigns training labels to the proposals. It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than ``self.positive_sample_fraction``. Args: See :meth:`ROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the proposal boxes - gt_boxes: the ground-truth box that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) Other fields such as "gt_classes", "gt_masks", that's included in `targets`. """ gt_boxes = [x.gt_boxes for x in targets] # Augment proposals with ground-truth boxes. # In the case of learned proposals (e.g., RPN), when training starts # the proposals will be low quality due to random initialization. # It's possible that none of these initial # proposals have high enough overlap with the gt objects to be used # as positive examples for the second stage components (box head, # cls head, mask head). Adding the gt boxes to the set of proposals # ensures that the second stage components will have some positive # examples from the start of training. For RPN, this augmentation improves # convergence and empirically improves box AP on COCO by about 0.5 # points (under one tested configuration). if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, matched_labels = self.proposal_matcher( match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes) # Set target attributes of the sampled proposals: proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes # We index all the attributes of targets that start with "gt_" # and have not been added to proposals yet (="gt_classes"). if has_gt: sampled_targets = matched_idxs[sampled_idxs] # NOTE: here the indexing waste some compute, because heads # like masks, keypoints, etc, will filter the proposals again, # (by foreground/background, or number of keypoints in the image, etc) # so we essentially index the data twice. for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_per_image.has(trg_name): proposals_per_image.set(trg_name, trg_value[sampled_targets]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_idxs), 4))) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append( (gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt
def forward(self, features, pred_instances=None, targets=None): for i, f in enumerate(self.in_features): if i == 0: x = self.scale_heads[i](features[f]) else: x = x + self.scale_heads[i](features[f]) pred_logits = self.predictor(x) pred_edge = pred_logits.sigmoid() att_map = self.attender(1 - pred_edge) # regions that need evolution if self.training: edge_target = targets[0] snake_input = x pred_edge_full = F.interpolate( pred_edge, scale_factor=self.common_stride, mode="bilinear", align_corners=False, ) snake_input = torch.cat([att_map, x], dim=1) # Quick fix for batches that do not have poly after filtering try: _, poly_loss = self.refine_head(snake_input, None, targets[1]) except Exception: poly_loss = {} edge_loss = self.loss(pred_edge_full, edge_target) * self.loss_weight poly_loss.update({ "loss_edge_det": edge_loss, }) return [], poly_loss, [] else: snake_input = torch.cat([att_map, x], dim=1) if "instance" in self.gt_input: assert targets[1][0] is not None for im_i in range(len(targets[1][0])): gt_instances_per_im = targets[1][0][im_i] bboxes = gt_instances_per_im.gt_boxes.tensor instances_per_im = Instances( pred_instances[im_i]._image_size) instances_per_im.pred_boxes = Boxes(bboxes) instances_per_im.pred_classes = gt_instances_per_im.gt_classes instances_per_im.scores = torch.ones_like( gt_instances_per_im.gt_classes, device=bboxes.device) if gt_instances_per_im.has("gt_masks"): gt_masks = gt_instances_per_im.gt_masks ext_pts_off = self.refine_head.get_simple_extreme_points( gt_masks.polygons).to(bboxes.device) ex_t = torch.stack( [ext_pts_off[:, None, 0], bboxes[:, None, 1]], dim=2) ex_l = torch.stack( [bboxes[:, None, 0], ext_pts_off[:, None, 1]], dim=2) ex_b = torch.stack( [ext_pts_off[:, None, 2], bboxes[:, None, 3]], dim=2) ex_r = torch.stack( [bboxes[:, None, 2], ext_pts_off[:, None, 3]], dim=2) instances_per_im.ext_points = ExtremePoints( torch.cat([ex_t, ex_l, ex_b, ex_r], dim=1)) pred_instances[im_i] = instances_per_im new_instances, _ = self.refine_head(snake_input, pred_instances, None) pred_edge = att_map return pred_edge, {}, new_instances
def may_visualize_gt(self, batched_inputs, init_objectness, init_bbox, refine_objectness, refine_boxes, centers, pred_init_boxes, pred_refine_boxes, logits): """ Visualize initial and refine boxes using mathced labels for filtering. The prediction at positive positions are shown. """ if self.training: if self.vis_period <= 0: return storage = get_event_storage() if not storage.iter % self.vis_period == 0: return from detectron2.utils.visualizer import Visualizer image_index = 0 img = batched_inputs[image_index]["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_init = Visualizer(img, None) v_init = v_init.overlay_instances(boxes=Boxes(init_bbox[image_index][ init_objectness[image_index]].cpu())) init_image = v_init.get_image() v_refine = Visualizer(img, None) v_refine = v_refine.overlay_instances( boxes=Boxes(refine_boxes[image_index][ refine_objectness[image_index] > 0].cpu())) refine_image = v_refine.get_image() if self.training: vis_img = np.vstack((init_image, refine_image)) vis_img = vis_img.transpose(2, 0, 1) storage.put_image("TOP: init gt boxes; Bottom: refine gt boxes", vis_img) vp_init = Visualizer(img, None) selected_centers = centers[init_objectness[image_index]].cpu().numpy() vp_init = vp_init.overlay_instances( boxes=Boxes(pred_init_boxes[image_index][ init_objectness[image_index]].detach().cpu()), labels=logits[image_index] [init_objectness[image_index]].sigmoid().max(1)[0].detach().cpu()) init_image = vp_init.get_image() for point in selected_centers: init_image = cv2.circle(init_image, tuple(point), 3, (255, 255, 255)) vp_refine = Visualizer(img, None) foreground_idxs = (refine_objectness[image_index] >= 0).logical_and( refine_objectness[image_index] < self.num_classes) selected_centers = centers[foreground_idxs].cpu().numpy() vp_refine = vp_refine.overlay_instances( boxes=pred_refine_boxes[image_index] [foreground_idxs].detach().cpu(), labels=logits[image_index][foreground_idxs].sigmoid().max( 1)[0].detach().cpu()) refine_image = vp_refine.get_image() for point in selected_centers: refine_image = cv2.circle(refine_image, tuple(point), 3, (255, 255, 255)) vis_img = np.vstack((init_image, refine_image)) if self.training: vis_img = vis_img.transpose(2, 0, 1) storage.put_image( "TOP: init pred boxes; Bottom: refine pred boxes", vis_img) # NOTE: This is commented temporarily. Uncomment it if # eagerly visualization is desired. '''
def extract_features(args, detector, raw_images, given_boxes=None): with torch.no_grad(): inputs = [] for raw_image in raw_images: image = detector.transform_gen.get_transform(raw_image).apply_image(raw_image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs.append({"image": image, "height": raw_image.shape[0], "width": raw_image.shape[1]}) images = detector.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = detector.model.backbone(images.tensor) # Feature extraction given the bounding boxes if given_boxes: # Process Boxes in batch mode proposal_boxes = [] original_boxes = [] box_ids = [] for i, boxes_data in enumerate(given_boxes): boxes = [] curr_box_ids = [] for bid, bbox in boxes_data: boxes.append(bbox) curr_box_ids.append(bid) raw_boxes = Boxes(torch.tensor(boxes, device=images.tensor.device)) raw_image = raw_images[i] # Remember that raw_image has shape [height, width, color_channel] raw_height, raw_width = raw_image.shape[:2] # Remember that images[i] has shape [color_channel, height, width] new_height, new_width = images[i].shape[1:] # Scale the box scale_x = 1. * new_width / raw_width scale_y = 1. * new_height / raw_height boxes = raw_boxes.clone() boxes.scale(scale_x=scale_x, scale_y=scale_y) proposal_boxes.append(boxes) original_boxes.append(raw_boxes) box_ids.append(curr_box_ids) features = [features[f] for f in detector.model.roi_heads.in_features] box_features = detector.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled) pred_class_prob = torch.softmax(pred_class_logits, -1) # we reset the background class that we will ignore later on pred_class_prob[:, -1] = 0.0 roi_features = feature_pooled outputs = [] total_boxes = 0 # roi_features.shape = (num_total_boxes, 2048) # we need to group the boxes by image id for batch_idx, raw_image in enumerate(raw_images): indexes = slice(total_boxes, total_boxes + len(given_boxes[batch_idx])) instances = Instances( image_size=raw_image.shape[:2], pred_boxes=original_boxes[batch_idx], scores=pred_class_prob[indexes], features=roi_features[indexes], box_ids=box_ids[batch_idx] ) outputs.append(instances) total_boxes += len(given_boxes[batch_idx]) return outputs # Feature extraction without bounding boxes # Generate proposals with RPN proposals, _ = detector.model.proposal_generator(images, features, None) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in detector.model.roi_heads.in_features] box_features = detector.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # (sum_proposals, 2048), pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled) rcnn_outputs = FastRCNNOutputs( detector.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, detector.model.roi_heads.smooth_l1_beta, ) # Fixed-number NMS instances_list, ids_list = [], [] probs_list = rcnn_outputs.predict_probs() boxes_list = rcnn_outputs.predict_boxes() for probs, boxes, image_size in zip(probs_list, boxes_list, images.image_sizes): for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image_size, nms_thresh=nms_thresh, topk_per_image=args.max_boxes ) if len(ids) >= args.min_boxes: break instances_list.append(instances) ids_list.append(ids) # Post processing for features features_list = feature_pooled.split( rcnn_outputs.num_preds_per_image) # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)] roi_features_list = [] for ids, features in zip(ids_list, features_list): roi_features_list.append(features[ids].detach()) # Post processing for bounding boxes (rescale to raw_image) raw_instances_list = [] for batch_idx, (instances, input_per_image, image_size) in enumerate(zip( instances_list, inputs, images.image_sizes )): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) raw_instances, nonempty = detector_postprocess(instances, height, width) raw_instances.features = roi_features_list[batch_idx][nonempty] raw_instances_list.append(raw_instances) return raw_instances_list
def process(self, input, output): previous_len = len(self._partial_results) for instance, output in zip(input, output): input_image_id = instance['image_id'] instance_gt_annots = self._coco_api.loadAnns( self._coco_api.getAnnIds(imgIds=input_image_id)) im_name = os.path.basename(instance['file_name']) fields = output["instances"].get_fields() pred_boxes = fields['pred_boxes'] # xyxy scores = fields['scores'].cpu().numpy() pred_class = fields['pred_classes'] if instance_gt_annots: # GT but not preds --> FN if len(pred_boxes) == 0: for annot_dict in instance_gt_annots: row = [im_name, "FN", "FN", "non-eval", -1, "NA"] self._partial_results += [row] # GT and preds --> TP or FP else: det_out = "TP" from detectron2.structures import Boxes, pairwise_iou, BoxMode gt_boxes = torch.tensor([ annot_dict['bbox'] for annot_dict in instance_gt_annots ]) gt_boxes = BoxMode.convert(gt_boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_boxes = Boxes(gt_boxes.to(pred_boxes.device)) ious = pairwise_iou(gt_boxes, pred_boxes) paired_preds = [] for gt_idx, matches in enumerate(ious): if matches.sum() == 0: row = [im_name, "FN", "FN", "non-eval", -1, "NA"] self._partial_results += [row] else: if self.eval_mode == "iou": pred_idx = matches.argmax() if pred_idx not in paired_preds: paired_preds.append(pred_idx) class_out = self._is_polyp_classified( pred_class[pred_idx], instance_gt_annots[gt_idx] ['category_id']) row = [ im_name, det_out, "TP", class_out, scores[pred_idx], pred_boxes[pred_idx] ] self._partial_results += [row] else: row = [ im_name, det_out, "FP", "non-eval", scores[pred_idx], pred_boxes[pred_idx] ] self._partial_results += [row] else: for posible_match in matches.nonzero(): gt_box = gt_boxes.tensor[gt_idx] gt_x1, gt_y1, gt_x2, gt_y2 = gt_box pred_box = pred_boxes.tensor[posible_match] pred_x1, pred_y1, pred_x2, pred_y2 = pred_box.squeeze( ) if self.eval_mode == 'old': pred_cx, pred_cy = ( pred_x1 + (pred_x2 - pred_x1) / 2), ( pred_y1 + (pred_y2 - pred_y1) / 2) eval_condition = ( gt_x1 < pred_cx < gt_x2) and ( gt_y1 < pred_cy < gt_y2) else: gt_cx, gt_cy = ( gt_x1 + (gt_x2 - gt_x1) / 2), ( gt_y1 + (gt_y2 - gt_y1) / 2) eval_condition = ( pred_x1 < gt_cx < pred_x2) and ( pred_y1 < gt_cy < pred_y2) if eval_condition: if posible_match not in paired_preds: paired_preds.append(posible_match) class_out = self._is_polyp_classified( pred_class[posible_match], instance_gt_annots[gt_idx] ['category_id']) row = [ im_name, det_out, "TP", class_out, scores[posible_match], pred_boxes[posible_match] ] self._partial_results += [row] else: row = [ im_name, det_out, "FP", "non-eval", scores[posible_match], pred_boxes[posible_match] ] self._partial_results += [row] # for pred_box, pred_score, pred_classif in zip(pred_boxes, scores, pred_class): # pred_x1, pred_y1, pred_x2, pred_y2 = pred_box # if instance_gt_annots: # for annot_dict in instance_gt_annots: # gt_bbox = annot_dict['bbox'] # xywh # gt_bbox[2] += gt_bbox[0] # gt_bbox[3] += gt_bbox[1] # xyxy # # gt_x1, gt_y1, gt_x2, gt_y2 = gt_bbox # # eval_condition = self._is_localized(gt_bbox, gt_x1, gt_x2, gt_y1, gt_y2, pred_box, # pred_x1, pred_x2, pred_y1, pred_y2) # # if eval_condition: # class_out = self._is_polyp_classified(pred_classif, annot_dict['category_id']) # # row = [im_name, det_out, "TP", class_out, pred_score, pred_box] # self._partial_results += [row] # instance_gt_annots.remove(annot_dict) # break # # else: # row = [im_name, "FP", "FP", "non-eval", pred_score, pred_box] # self._partial_results += [row] else: # No GT but Preds --> FP if len(pred_boxes) > 0: for pred_box, pred_score, pred_classif in zip( pred_boxes, scores, pred_class): row = [ im_name, "FP", "FP", "non-eval", pred_score, pred_box ] self._partial_results += [row] # No GT and no Preds --> TN else: row = [im_name, "TN", "TN", "non-eval", -1, "NA"] self._partial_results += [row]
def get_ground_truth(self, anchors, targets, gt_classification): """ Args: anchors (list[list[Boxes]]): a list of N=#image elements. Each is a list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. targets (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Specify `targets` during training only. Returns: gt_classes (Tensor): An integer tensor of shape (N, R) storing ground-truth labels for each anchor. R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels. Anchors with an IoU with some target higher than the foreground threshold are assigned their corresponding label in the [0, K-1] range. Anchors whose IoU are below the background threshold are assigned the label "K". Anchors whose IoU are between the foreground and background thresholds are assigned a label "-1", i.e. ignore. gt_anchors_deltas (Tensor): Shape (N, R, 4). The last dimension represents ground-truth box2box transform targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. The values in the tensor are meaningful only when the corresponding anchor is labeled as foreground. """ gt_classes = [] gt_anchors_deltas = [] anchors = [Boxes.cat(anchors_i) for anchors_i in anchors] # list[Tensor(R, 4)], one for each image for anchors_per_image, targets_per_image, classification_per_image in zip( anchors, targets, gt_classification): match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, anchors_per_image) gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix) has_gt = len(targets_per_image) > 0 if has_gt: # ground truth box regression matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs] gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas( anchors_per_image.tensor, matched_gt_boxes.tensor) gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs] # Anchors with label 0 are treated as background. gt_classes_i[anchor_labels == 0] = self.num_classes # Anchors with label -1 are ignored. gt_classes_i[anchor_labels == -1] = -1 else: gt_classes_i = torch.zeros_like( gt_matched_idxs) + self.num_classes gt_anchors_reg_deltas_i = torch.zeros_like( anchors_per_image.tensor) # only commodity and model data do object detection, # other type ignore all anchors # object_detection_enable = classification_per_image.gt_classes == 0 \ # or classification_per_image.gt_classes == 1 if not has_gt: # Anchors with label -1 are ignored. gt_classes_i[:] = -1 gt_classes.append(gt_classes_i) gt_anchors_deltas.append(gt_anchors_reg_deltas_i) return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
def _forward_box( self, features: Dict[str, torch.Tensor], proposals: List[Instances], void_proposals: Optional[List[Instances]] = None, image_path=None, flips=None, exemplar_info=None ) -> Union[Dict[str, torch.Tensor], List[Instances]]: """ Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. Args: features (dict[str, Tensor]): mapping from feature map names to tensor. Same as in :meth:`ROIHeads.forward`. proposals (list[Instances]): the per-image object proposals with their matching ground truth. Each has fields "proposal_boxes", and "objectness_logits", "gt_classes", "gt_boxes". Returns: In training, a dict of losses. In inference, a list of `Instances`, the predicted instances. """ features = [features[f] for f in self.box_in_features] box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) box_features = self.box_head(box_features) predictions = self.box_predictor(box_features) if self.training: void_box_features = self.box_pooler( features, [x.proposal_boxes for x in void_proposals]) void_box_features = self.box_head(void_box_features) void_predictions = self.box_predictor(void_box_features) if exemplar_info is not None: with torch.no_grad(): ap = void_proposals[:-1] l = sum([len(e) for e in ap]) lbl = self.box_predictor.add_exemplar( exemplar_info, void_box_features[:l].detach(), ap, image_path[:-1], flips[:-1]) if lbl is not None: for x, l in zip(ap, lbl): x.gt_classes = l del box_features losses = self.box_predictor.losses(predictions, proposals, void_predictions, void_proposals, image_path=image_path, flips=flips, use_exemplar=exemplar_info is not None) # proposals is modified in-place below, so losses must be computed first. if self.train_on_pred_boxes: with torch.no_grad(): pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( predictions, proposals) for proposals_per_image, pred_boxes_per_image in zip( proposals, pred_boxes): proposals_per_image.proposal_boxes = Boxes( pred_boxes_per_image) return losses else: pred_instances, get_inds = self.box_predictor.inference( predictions, proposals, use_unknown=True) del box_features return pred_instances
def func_cat(x: torch.Tensor): boxes1 = Boxes(x) boxes2 = Boxes(x) # boxes3 = Boxes.cat([boxes1, boxes2]) # this is not supported by torchsript for now. boxes3 = boxes1.cat([boxes1, boxes2]) return boxes3
def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): """ A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) to detectron2's format (i.e. list of Instances instance). This only works when the model follows the Caffe2 detectron's naming convention. Args: image_sizes (List[List[int, int]]): [H, W] of every image. tensor_outputs (Dict[str, Tensor]): external_output to its tensor. force_mask_on (Bool): if true, the it make sure there'll be pred_masks even if the mask is not found from tensor_outputs (usually due to model crash) """ results = [Instances(image_size) for image_size in image_sizes] batch_splits = tensor_outputs.get("batch_splits", None) if batch_splits: raise NotImplementedError() assert len(image_sizes) == 1 result = results[0] bbox_nms = tensor_outputs["bbox_nms"] score_nms = tensor_outputs["score_nms"] class_nms = tensor_outputs["class_nms"] # Detection will always success because Conv support 0-batch assert _is_valid_model_output_blob(bbox_nms) assert _is_valid_model_output_blob(score_nms) assert _is_valid_model_output_blob(class_nms) result.pred_boxes = Boxes(torch.Tensor(bbox_nms)) result.scores = torch.Tensor(score_nms) result.pred_classes = torch.Tensor(class_nms).to(torch.int64) mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) if _is_valid_model_output_blob(mask_fcn_probs): # finish the mask pred mask_probs_pred = torch.Tensor(mask_fcn_probs) num_masks = mask_probs_pred.shape[0] class_pred = result.pred_classes indices = torch.arange(num_masks, device=class_pred.device) mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] result.pred_masks = mask_probs_pred elif force_mask_on: # NOTE: there's no way to know the height/width of mask here, it won't be # used anyway when batch size is 0, so just set them to 0. result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) keypoints_out = tensor_outputs.get("keypoints_out", None) kps_score = tensor_outputs.get("kps_score", None) if _is_valid_model_output_blob(keypoints_out): # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) keypoints_tensor = torch.Tensor(keypoints_out) # NOTE: it's possible that prob is not calculated if "should_output_softmax" # is set to False in HeatmapMaxKeypoint, so just using raw score, seems # it doesn't affect mAP. TODO: check more carefully. keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] result.pred_keypoints = keypoint_xyp elif _is_valid_model_output_blob(kps_score): # keypoint heatmap to sparse data structure pred_keypoint_logits = torch.Tensor(kps_score) keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) return results
def inference_single_image(self, logits, init_boxes, refine_boxes, image_size): boxes_all = [] init_boxes_all = [] class_idxs_all = [] scores_all = [] for logit, init_box, refine_box in zip(logits, init_boxes, refine_boxes): scores, cls = logit.sigmoid().max(0) cls = cls.view(-1) scores = scores.view(-1) init_box = init_box.view(4, -1).permute(1, 0) refine_box = refine_box.view(4, -1).permute(1, 0) predicted_prob, topk_idxs = scores.sort(descending=True) num_topk = min(self.topk_candidates, cls.size(0)) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] init_box_topk = init_box[topk_idxs] refine_box_topk = refine_box[topk_idxs] cls_topk = cls[topk_idxs] score_topk = scores[topk_idxs] boxes_all.append(refine_box_topk) init_boxes_all.append(init_box_topk) class_idxs_all.append(cls_topk) scores_all.append(score_topk) # The following code is the decoding procedure of RetinaNet in D2. # However, it fails to handle the predictions though I thought it could. """ cls = logit.flatten().sigmoid() # pre nms num_topk = min(self.topk_candidates, cls.size(0)) predicted_prob, topk_idxs = cls.sort(descending=True) predicted_prob = predicted_prob[:num_topk] topk_idxs = topk_idxs[:num_topk] # filter out the proposals with low confidence score keep_idxs = predicted_prob > self.score_threshold predicted_prob = predicted_prob[keep_idxs] topk_idxs = topk_idxs[keep_idxs] points_idxs = topk_idxs // self.num_classes classes_idxs = topk_idxs % self.num_classes init_box = init_box.reshape(4, -1).clone() refine_box = refine_box.reshape(4, -1).clone() init_box = init_box[:, points_idxs].permute(1, 0) refine_box_topk = refine_box[:, points_idxs].permute(1, 0) boxes_all.append(refine_box_topk) init_boxes_all.append(init_box) class_idxs_all.append(classes_idxs) scores_all.append(predicted_prob) """ boxes_all, scores_all, class_idxs_all, init_boxes_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all, init_boxes_all] ] keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] result.init_boxes = init_boxes_all[keep] return result
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, class_logits=None, estimate_uncertainty=False, variance=torch.Tensor([])): """ Single-image inference. Return bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). Args: Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes per image. Returns: Same as `fast_rcnn_inference`, but for only one image. """ valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. # Get box ID with predicted class label: [box id, class label] filter_inds = filter_mask.nonzero() import numpy as np class_id = np.argmax(scores.cpu().numpy(), axis=1) class_id = np.array([np.arange(1000), class_id]) class_id = np.swapaxes(class_id, 1, 0) boxes_one_class = boxes[class_id[:, 0], class_id[:, 1], :].cpu().numpy() scores_one_class = np.max(scores.cpu().numpy(), axis=1) if not class_logits == None: class_logits = class_logits[filter_inds[:, 0]] predicted_probs = scores[filter_inds[:, 0]] if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores_filtered = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores_filtered, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes_final, scores_final, filter_inds_final = boxes[ keep], scores_filtered[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes_final) result.scores = scores_final result.pred_classes = filter_inds_final[:, 1] # Jamie # Save out logits if not class_logits == None: #result.class_logits = class_logits[filter_inds_final[:,0]] result.class_logits = class_logits[keep] result.prob_score = predicted_probs[keep] #class_logits = class_logits[filter_inds_final[:,0]] #result.class_logits = class_logits[keep] if estimate_uncertainty: # std from 1000 proposals #stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes_one_class, scores_one_class, 0.75) # std from bbox with class confidence score higher than threshold stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes.cpu().numpy(), scores_filtered.cpu().numpy(), 0.9) result.stds = torch.Tensor(stds).cuda() if len(variance) > 0: result.vars = variance[keep] return result, filter_inds_final[:, 0]
def ga_shape_targets(self, approxs, inside_flags, squares, gt_instances): assert len(approxs) == len(inside_flags) == len(squares) approxs_flatten = Boxes.cat(approxs) inside_flags_flatten = torch.cat(inside_flags) squares_flatten = Boxes.cat(squares)
def evaluate(cfg, evaluator, det_1, det_2, anno, predictor, method): evaluator.reset() img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/' num_img = len(det_2['image']) count_1 = 0 count_2 = 0 count_fusion = 0 print('Method: ', method) img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/' num_img = len(det_2['image']) count_1 = 0 count_2 = 0 count_fusion = 0 X = None Y = np.array([]) cnt = 0 for i in range(num_img): info_1 = {} info_1['img_name'] = det_1['image'][i] info_1['bbox'] = det_1['boxes'][i] info_1['score'] = det_1['scores'][i] info_1['class'] = det_1['classes'][i] info_1['class_logits'] = det_1['class_logits'][i] if 'probs' in det_1.keys(): info_1['prob'] = det_1['probs'][i] info_2 = {} info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg' info_2['bbox'] = det_2['boxes'][i] info_2['score'] = det_2['scores'][i] info_2['class'] = det_2['classes'][i] info_2['class_logits'] = det_2['class_logits'][i] if 'probs' in det_2.keys(): info_2['prob'] = det_2['probs'][i] #img_id = int(info_1['img_name'].split('.')[0].split('_')[1]) - 1 img_id = det_1['image_id'][i] box_gt = [] class_gt = [] info_gt = {} #print('img_id:',img_id) if img_id in anno.keys(): # Handle groundtruth anno_gt = anno[img_id] for j in range(len(anno_gt)): box = anno_gt[j]['bbox'] box_gt.append( [box[0], box[1], box[0] + box[2], box[1] + box[3]]) class_gt.append(anno_gt[j]['category_id']) info_gt['bbox'] = box_gt info_gt['class'] = class_gt # If no any detection in two results if len(info_1['bbox']) == 0 and len(info_2['bbox']) == 0: continue # If no detection in 1st model: elif len(info_1['bbox']) == 0: print('model 1 miss detected') in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det( info_2) score_results, class_results, box_results = nms_multiple_box( in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) elif len(info_2['bbox']) == 0: print('model 2 miss detected') in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det( info_1) score_results, class_results, box_results = nms_multiple_box( in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) else: in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt( info_1, info_2) score_results, class_results, box_results = nms_multiple_box( in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) #class_results, score_results, box_results = match_box_nms(in_boxes, in_scores, in_class, in_logits, 0.5, num_det, method) pred_prob_multiclass = predictor.predict_proba(score_results) out_scores = np.max(pred_prob_multiclass, axis=1) out_class = np.argmax(pred_prob_multiclass, axis=1) """ Send information to evaluator """ # Image info file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg' img = cv2.imread(file_name) H, W, _ = img.shape # Handle inputs inputs = [] input_info = {} input_info['file_name'] = file_name input_info['height'] = H input_info['width'] = W input_info['image_id'] = det_1['image_id'][i] input_info['image'] = torch.Tensor(img) inputs.append(input_info) # Handle outputs outputs = [] out_info = {} proposals = Instances([H, W]) proposals.pred_boxes = Boxes(box_results) proposals.scores = torch.Tensor(out_scores) proposals.pred_classes = torch.Tensor(out_class) out_info['instances'] = proposals outputs.append(out_info) evaluator.process(inputs, outputs) if len(score_results): if cnt == 0: X = score_results else: try: X = np.concatenate((X, score_results)) except: pdb.set_trace() Y = np.concatenate((Y, class_results)) cnt += 1 else: continue results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out') if results is None: results = {} avgRGB = count_1 / num_img avgThermal = count_2 / num_img avgNMS = count_fusion / num_img print('Avg bbox for RGB:', avgRGB, "average count thermal:", avgThermal, 'average count nms:', avgNMS) return results
def convert_to_coco_dict(dataset_name): """ Convert an instance detection/segmentation or keypoint detection dataset in detectron2's standard format into COCO json format. Generic dataset description can be found here: https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name (str): name of the source dataset Must be registered in DatastCatalog and in detectron2's standard format. Must have corresponding metadata "thing_classes" Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[ contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa categories = [{ "id": reverse_id_mapper(id), "name": name } for id, name in enumerate(metadata.thing_classes)] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict.get("annotations", []) for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon if isinstance(segmentation, list): polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() elif isinstance(segmentation, dict): # RLE area = mask_util.area(segmentation).item() else: raise TypeError( f"Unknown segmentation type {type(segmentation)}!") else: # Computing areas using bounding boxes bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = float(area) coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) coco_annotation["category_id"] = reverse_id_mapper( annotation["category_id"]) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: seg = coco_annotation["segmentation"] = annotation[ "segmentation"] if isinstance(seg, dict): # RLE counts = seg["counts"] if not isinstance(counts, str): # make it json-serializable seg["counts"] = counts.decode("ascii") coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}") info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for Detectron2.", } coco_dict = { "info": info, "images": coco_images, "categories": categories, "licenses": None } if len(coco_annotations) > 0: coco_dict["annotations"] = coco_annotations return coco_dict
def forward(self, proposal_deltas, proposal_boxes): instances = Instances((10, 10)) instances.proposal_boxes = Boxes(proposal_boxes) return self._output_layer.predict_boxes( (None, proposal_deltas), [instances])
def inference_single_image(self, cate_preds, kernel_preds, seg_preds, cur_size, ori_size): # overall info. h, w = cur_size f_h, f_w = seg_preds.size()[-2:] ratio = math.ceil(h / f_h) upsampled_size_out = (int(f_h * ratio), int(f_w * ratio)) # process. inds = (cate_preds > self.score_threshold) cate_scores = cate_preds[inds] if len(cate_scores) == 0: results = Instances(ori_size) results.scores = torch.tensor([]) results.pred_classes = torch.tensor([]) results.pred_masks = torch.tensor([]) results.pred_boxes = Boxes(torch.tensor([])) return results # cate_labels & kernel_preds inds = inds.nonzero() cate_labels = inds[:, 1] kernel_preds = kernel_preds[inds[:, 0]] # trans vector. size_trans = cate_labels.new_tensor(self.num_grids).pow(2).cumsum(0) strides = kernel_preds.new_ones(size_trans[-1]) n_stage = len(self.num_grids) strides[:size_trans[0]] *= self.instance_strides[0] for ind_ in range(1, n_stage): strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.instance_strides[ ind_] strides = strides[inds[:, 0]] # mask encoding. N, I = kernel_preds.shape kernel_preds = kernel_preds.view(N, I, 1, 1) seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid() # mask. seg_masks = seg_preds > self.mask_threshold sum_masks = seg_masks.sum((1, 2)).float() # filter. keep = sum_masks > strides if keep.sum() == 0: results = Instances(ori_size) results.scores = torch.tensor([]) results.pred_classes = torch.tensor([]) results.pred_masks = torch.tensor([]) results.pred_boxes = Boxes(torch.tensor([])) return results seg_masks = seg_masks[keep, ...] seg_preds = seg_preds[keep, ...] sum_masks = sum_masks[keep] cate_scores = cate_scores[keep] cate_labels = cate_labels[keep] # mask scoring. seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks cate_scores *= seg_scores # sort and keep top nms_pre sort_inds = torch.argsort(cate_scores, descending=True) if len(sort_inds) > self.max_before_nms: sort_inds = sort_inds[:self.max_before_nms] seg_masks = seg_masks[sort_inds, :, :] seg_preds = seg_preds[sort_inds, :, :] sum_masks = sum_masks[sort_inds] cate_scores = cate_scores[sort_inds] cate_labels = cate_labels[sort_inds] if self.nms_type == "matrix": # matrix nms & filter. cate_scores = matrix_nms(cate_labels, seg_masks, sum_masks, cate_scores, sigma=self.nms_sigma, kernel=self.nms_kernel) keep = cate_scores >= self.update_threshold elif self.nms_type == "mask": # original mask nms. keep = mask_nms(cate_labels, seg_masks, sum_masks, cate_scores, nms_thr=self.mask_threshold) else: raise NotImplementedError if keep.sum() == 0: results = Instances(ori_size) results.scores = torch.tensor([]) results.pred_classes = torch.tensor([]) results.pred_masks = torch.tensor([]) results.pred_boxes = Boxes(torch.tensor([])) return results seg_preds = seg_preds[keep, :, :] cate_scores = cate_scores[keep] cate_labels = cate_labels[keep] # sort and keep top_k sort_inds = torch.argsort(cate_scores, descending=True) if len(sort_inds) > self.max_per_img: sort_inds = sort_inds[:self.max_per_img] seg_preds = seg_preds[sort_inds, :, :] cate_scores = cate_scores[sort_inds] cate_labels = cate_labels[sort_inds] # reshape to original size. seg_preds = F.interpolate(seg_preds.unsqueeze(0), size=upsampled_size_out, mode='bilinear')[:, :, :h, :w] seg_masks = F.interpolate(seg_preds, size=ori_size, mode='bilinear').squeeze(0) seg_masks = seg_masks > self.mask_threshold results = Instances(ori_size) results.pred_classes = cate_labels results.scores = cate_scores results.pred_masks = seg_masks # get bbox from mask pred_boxes = torch.zeros(seg_masks.size(0), 4) #for i in range(seg_masks.size(0)): # mask = seg_masks[i].squeeze() # ys, xs = torch.where(mask) # pred_boxes[i] = torch.tensor([xs.min(), ys.min(), xs.max(), ys.max()]).float() results.pred_boxes = Boxes(pred_boxes) return results
def get_empty_instance(h, w): inst = Instances((h, w)) inst.gt_boxes = Boxes(torch.rand(0, 4)) inst.gt_classes = torch.tensor([]).to(dtype=torch.int64) inst.gt_masks = BitMasks(torch.rand(0, h, w)) return inst
def inference(self, pred_digits,pred_points,ins_feature,images): """ Arguments: pred_digits, pred_points: Same as the output of: images (ImageList): the input images Returns: results (List[Instances]): a list of #images elements. """ batch=pred_digits.size(0) pred_digits=pred_digits.sigmoid_() results=[] pool_digits=F.max_pool2d(pred_digits,3,1,1) for img_idx in range(batch): # Get the size of the current image image_size = images.image_sizes[img_idx] digits_im = pred_digits[img_idx] pool_digits_im=pool_digits[img_idx] points_im=pred_points[img_idx] # print(points_im[:,15,15].view(-1,2).cpu().numpy()) Index=torch.nonzero((digits_im==pool_digits_im) & (digits_im>self.score_threshold)) results_im=Instances(image_size) if Index.size(0)<1: results_im.pred_classes = Index.new_zeros(0) results_im.pred_boxes = Boxes(points_im.new_zeros(0,4)) results_im.scores = digits_im.new_zeros(0) results_im.pred_points=points_im.new_zeros(0,points_im.size(0)//2,2) results.append(results_im) continue cls_idxs=Index[:,0] pred_prob=digits_im[Index[:,0],Index[:,1],Index[:,2]] center=torch.cat([Index[:,2:3],Index[:,1:2]],dim=1) points_n_yx = points_im[:,Index[:,1],Index[:,2]] points_n=points_n_yx.clone().detach() points_n[::2,:]=points_n_yx[1::2,:] points_n[1::2,:]=points_n_yx[::2,:] # print(points_n,points_n.size()) N=center.size(0) # print(N) TOPK=100 if N>TOPK: pred_prob, topk_idxs = pred_prob.sort(descending=True) # Keep top k scoring values pred_prob = pred_prob[:TOPK] # Keep top k values center = center[topk_idxs[:TOPK],:] points_n = points_n[:,topk_idxs[:TOPK]] cls_idxs=cls_idxs[topk_idxs[:TOPK]] N=TOPK center=center.view(N,1,2) npoints=torch.transpose(points_n,1,0) npoints=npoints.view(N,-1,2) real_npoints=npoints+center real_npoints=real_npoints*self.points_feature_strides[-1] location=(real_npoints[:,:,(1,0)]/self.ins_feature_strides[0]).float() batch_index=Index.new_zeros(N)+img_idx pred_ins=self.ins_head(ins_feature,location,batch_index) pred_ins=F.interpolate(pred_ins,scale_factor=self.ins_feature_strides[0],mode='bilinear').squeeze(1) pred_masks=(pred_ins>0.5) #crop to the image size: pred_masks=pred_masks[:,:image_size[0],:image_size[1]] top_left,_=torch.min(real_npoints,dim=1) bottom_right,_=torch.max(real_npoints,dim=1) bbox=torch.cat([top_left,bottom_right],dim=1) # print(pred_prob,center,bbox) results_im.pred_classes = cls_idxs results_im.pred_boxes = Boxes(bbox) results_im.scores = pred_prob results_im.pred_points=real_npoints results_im.pred_masks=pred_masks results.append(results_im) return results
def forward_for_single_feature_map(self, locations, box_cls, reg_pred, ctrness, image_sizes): N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) ctrness = ctrness.view(N, 1, H, W).permute(0, 2, 3, 1) ctrness = ctrness.reshape(N, -1).sigmoid() # if self.thresh_with_ctr is True, we multiply the classification # scores with centerness scores before applying the threshold. if self.thresh_with_ctr: box_cls = box_cls * ctrness[:, :, None] candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) if not self.thresh_with_ctr: box_cls = box_cls * ctrness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) boxlist = Instances(image_sizes[i]) boxlist.pred_boxes = Boxes(detections) boxlist.scores = torch.sqrt(per_box_cls) boxlist.pred_classes = per_class boxlist.locations = per_locations results.append(boxlist) return results
def inference_single_image(self, locations, box_cls, box_reg, center_score, image_size): boxes_all = [] scores_all = [] class_idxs_all = [] # Iterate over every feature level for box_cls_i, box_reg_i, locs_i, center_score_i in zip( box_cls, box_reg, locations, center_score): # (HxW, C) box_cls_i = box_cls_i.sigmoid_() keep_idxs = box_cls_i > self.pre_nms_thresh # multiply the classification scores with center scores box_cls_i *= center_score_i.sigmoid_() box_cls_i = box_cls_i[keep_idxs] keep_idxs_nonzero_i = keep_idxs.nonzero() box_loc_i = keep_idxs_nonzero_i[:, 0] class_i = keep_idxs_nonzero_i[:, 1] box_reg_i = box_reg_i[box_loc_i] locs_i = locs_i[box_loc_i] per_pre_nms_top_n = keep_idxs.sum().clamp(max=self.pre_nms_top_n) if keep_idxs.sum().item() > per_pre_nms_top_n.item(): box_cls_i, topk_idxs = box_cls_i.topk(per_pre_nms_top_n, sorted=False) class_i = class_i[topk_idxs] box_reg_i = box_reg_i[topk_idxs] locs_i = locs_i[topk_idxs] # predict boxes predicted_boxes = torch.stack([ locs_i[:, 0] - box_reg_i[:, 0], locs_i[:, 1] - box_reg_i[:, 1], locs_i[:, 0] + box_reg_i[:, 2], locs_i[:, 1] + box_reg_i[:, 3], ], dim=1) box_cls_i = torch.sqrt(box_cls_i) boxes_all.append(predicted_boxes) scores_all.append(box_cls_i) class_idxs_all.append(class_i) boxes_all, scores_all, class_idxs_all = [ cat(x) for x in [boxes_all, scores_all, class_idxs_all] ] # Apply per-class nms for each image keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_thresh) keep = keep[:self.max_detections_per_image] result = Instances(image_size) result.pred_boxes = Boxes(boxes_all[keep]) result.scores = scores_all[keep] result.pred_classes = class_idxs_all[keep] return result
def __call__(self, values): return Boxes(values[0])
def find_top_rpn_proposals( proposals, pred_objectness_logits, images, nms_thresh, pre_nms_topk, post_nms_topk, min_box_side_len, training, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps if `training` is True, otherwise, returns the highest `post_nms_topk` scoring proposals for each feature map. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). images (ImageList): Input images as an :class:`ImageList`. nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_side_len (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: proposals (list[Instances]): list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i. """ image_sizes = images.image_sizes # in (h, w) order num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip(itertools.count(), proposals, pred_objectness_logits): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) lvl = level_ids if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[ keep], level_ids[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official LVIS API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) anno = lvis_api.load_anns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor([obj["area"] for obj in anno]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def test_empty_cat(self): x = Boxes.cat([]) self.assertTrue(x.tensor.shape, (0, 4))