def postprocess_detections( self, class_logits: Tensor, box_regression: Tensor, proposals: List[Tensor], image_shapes: List[Tuple[int, int]] ) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]: device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.where(scores > self.score_thresh)[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes pred_boxes = (pred_boxes,) pred_scores = (pred_scores,) else: pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]] num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] objectness_prob = torch.sigmoid(objectness) final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, img_shape) # remove small boxes keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # remove low scoring boxes # use >= for Backwards compatibility keep = torch.where(scores >= self.score_thresh)[0] boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def postprocess_detections( self, pred_scores, # type: Tensor pred_boxes, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] # device = class_logits.device # num_classes = class_logits.shape[-1] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) #去掉背景类的得分 scores = scores[:, 1:] # labels = labels[:, 1:] scores, labels = scores.max(dim=1) labels += 1 #目标的标签是从1开始的 # remove low scoring boxes inds = torch.where(scores > self.score_thresh)[0] boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def generate_anchors(self, x: Tensor) -> None: anchors = torch.cat([ _generate_anchors(self.input_size, x.size(-1), listify(anchor_sizes), self.aspect_ratios, stride) for anchor_sizes, stride in zip(self.anchor_sizes, self.strides) ], dim=0) # Filter anchors anchors = box_ops.clip_boxes_to_image( anchors, (self.input_size, self.input_size)) keep = box_ops.remove_small_boxes(anchors, 1e-3) self.anchors = anchors[keep]
def filter_predictions(self, boxes, labels, scores): inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.num_boxes_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] return boxes, scores, labels
def filter_proposals_patch(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) print("patch:filter_proposals") num_images = proposals.shape[0] device = proposals.device objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) #All call the patch image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): # boxes = box_ops.clip_boxes_to_image(boxes, img_shape) boxes = clip_boxes_to_image_patch(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def crop(img: Image, target: Dict[str, Any], region: Tuple[int]) -> Tuple[Image, Dict[str, Any]]: """ Args: region: [Top, Left, H, W] """ # crop image src_w, src_h = img.size img = TF.crop(img, *region) target = deepcopy(target) top, left, h, w = region # set new image size if "size" in target.keys(): target["size"] = (h, w) fields: List[str] = list() for k, v in target.items(): if isinstance(v, Tensor): fields.append(k) # crop bounding boxes if "boxes" in target: boxes = target["boxes"] boxes[:, [0, 2]] *= src_w boxes[:, [1, 3]] *= src_h boxes = box_op.box_convert(boxes, "cxcywh", "xyxy") boxes -= torch.tensor([left, top, left, top]) boxes = box_op.clip_boxes_to_image(boxes, (h, w)) keep = box_op.remove_small_boxes(boxes, 1) boxes[:, [0, 2]] /= w boxes[:, [1, 3]] /= h boxes = box_op.box_convert(boxes, "xyxy", "cxcywh") target["boxes"] = boxes for field in fields: target[field] = target[field][keep] if "masks" in target: target['masks'] = target['masks'][:, top:top + h, left:left + w] keep = target['masks'].flatten(1).any(1) for field in fields: target[field] = target[field][keep] return img, target
def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): # For onnx export, Clip's min max can not be traced as tensor. if torchvision._is_tracing(): boxes = _onnx_clip_boxes_to_image(boxes, img_shape) else: boxes = box_ops.clip_boxes_to_image(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) batch_idx = torch.arange(num_images, device=device)[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level #lvl=torch.tensor(np.arange(len(lvl))).to(device) keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def postprocess_boxes( self, class_logits, box_regression, embeddings, proposals, image_shapes, fcs=None, gt_det=None, cws=True, ): """ Similar to RoIHeads.postprocess_detections, but can handle embeddings and implement First Classification Score (FCS). """ device = class_logits.device boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) if fcs is not None: # Fist Classification Score (FCS) pred_scores = fcs[0] else: pred_scores = torch.sigmoid(class_logits) if cws: # Confidence Weighted Similarity (CWS) embeddings = embeddings * pred_scores.view(-1, 1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) pred_embeddings = embeddings.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_embeddings = [] for boxes, scores, embeddings, image_shape in zip( pred_boxes, pred_scores, pred_embeddings, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.ones(scores.size(0), device=device) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores.unsqueeze(1) labels = labels.unsqueeze(1) # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() embeddings = embeddings.reshape(-1, self.embedding_head.dim) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels, embeddings = ( boxes[inds], scores[inds], labels[inds], embeddings[inds], ) # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, embeddings = ( boxes[keep], scores[keep], labels[keep], embeddings[keep], ) if gt_det is not None: # include GT into the detection results boxes = torch.cat((boxes, gt_det["boxes"]), dim=0) labels = torch.cat((labels, torch.tensor([1.0]).to(device)), dim=0) scores = torch.cat((scores, torch.tensor([1.0]).to(device)), dim=0) embeddings = torch.cat((embeddings, gt_det["embeddings"]), dim=0) # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, embeddings = ( boxes[keep], scores[keep], labels[keep], embeddings[keep], ) all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_embeddings.append(embeddings) return all_boxes, all_scores, all_embeddings, all_labels
def roi_postprocess_detections(self, class_logits, box_regression, proposals, image_shapes, *extra_tensors): """Hack into the torchvision model to obtain features for training caption model; training is assumed to be false https://github.com/pytorch/vision/blob/master/ torchvision/models/detection/roi_heads.py""" device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes # and just assign to pred_boxes instead of pred_boxes_list pred_boxes_list = [pred_boxes] pred_scores_list = [pred_scores] extra_tensors_list = [[x] for x in extra_tensors] else: pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) extra_tensors_list = [ x.split(boxes_per_image, 0) for x in extra_tensors ] all_boxes = [] all_scores = [] all_labels = [] all_extras = [[] for _ in extra_tensors] for boxes, scores, image_shape, *extras in zip(pred_boxes_list, pred_scores_list, image_shapes, *extra_tensors_list): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # each feature vector is used for all 91 class predictions # there are 90 classes (minus the background) # take the feature vector corresponding to each class extras = [x[inds // (num_classes - 1)] for x in extras] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # each feature vector is used for all 91 class predictions # there are 90 classes (minus the background) # take the feature vector corresponding to each class extras = [x[keep] for x in extras] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # each feature vector is used for all 91 class predictions # there are 90 classes (minus the background) # take the feature vector corresponding to each class extras = [x[keep] for x in extras] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) for x, y in zip(all_extras, extras): x.append(y) return [all_boxes, all_scores, all_labels, *all_extras]
def box_features_hook(self, module, input, output): ''' hook for extracting features from MaskRCNN ''' features, proposals, image_shapes, targets = input box_features = module.box_roi_pool(features, proposals, image_shapes) box_features = module.box_head(box_features) class_logits, box_regression = module.box_predictor(box_features) device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = module.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_keeps = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() # remove low scoring boxes inds = torch.nonzero(scores > module.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, module.nms_thresh) # keep only topk scoring predictions keep = keep[:self.mask_rcnn_top_k_boxes] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_keeps.append(keep) box_features_per_image = [] for keep in all_keeps: box_features_per_image.append(box_features[keep]) self.detection_box_features = box_features_per_image self.fpn_pooled_features = self.avg2dpool( features['pool']).squeeze(-1).squeeze(-1)
def postprocess_detections(self, pred_scores, box_regression, embeddings_, proposals, image_shapes): device = pred_scores.device num_classes = pred_scores.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) pred_embeddings = embeddings_.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_embeddings = [] for boxes, scores, embeddings, image_shape in zip( pred_boxes, pred_scores, pred_embeddings, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # embeddings are already personized. # batch everything, by making every class prediction be a separate # instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() embeddings = embeddings.reshape(-1, self.embedding_head.dim) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels, embeddings = boxes[inds], scores[ inds], labels[inds], embeddings[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, embeddings = boxes[keep], scores[keep], \ labels[keep], embeddings[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, embeddings = boxes[keep], scores[keep], \ labels[keep], embeddings[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_embeddings.append(embeddings) return all_boxes, all_scores, all_embeddings, all_labels
def post_processor(self, class_logits, box_regression, proposals, img_metas): num_classes = class_logits.shape[1] device = class_logits.device boxes_per_image = [box.shape[0] for box in proposals] proposals = cat([box for box in proposals]) pred_boxes = self.box_coder.decode( box_regression.view(sum(boxes_per_image), -1), proposals) pred_boxes = pred_boxes.reshape(sum(boxes_per_image), -1, 4) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: pred_boxes = (pred_boxes, ) pred_scores = (pred_scores, ) else: pred_boxes = pred_boxes.split(boxes_per_image, dim=0) # (N, #CLS, 4) pred_scores = pred_scores.split(boxes_per_image, dim=0) # (N, #CLS) results = [] for scores, boxes, img_meta in zip(pred_scores, pred_boxes, img_metas): width, height = img_meta['img_shape'] boxes = box_ops.clip_boxes_to_image(boxes, (height, width)) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] result = { 'boxes': boxes, 'scores': scores, 'labels': labels, } results.append(result) return results
def postprocess_detections(self, class_logits, attr_logits, box_regression, proposals, image_shapes): # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]]) # class_logits: (1000,47), attr-logits: (1000,295) device = class_logits.device num_classes = class_logits.shape[-1] num_attrs = attr_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_ascores = F.sigmoid(attr_logits) # print(pred_ascores) # set_trace() # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes # and just assign to pred_boxes instead of pred_boxes_list pred_boxes_list = [pred_boxes] pred_scores_list = [pred_scores] pred_ascores_list = [pred_ascores] else: pred_boxes_list = pred_boxes.split(boxes_per_image, 0) # (bs,1000,47,4) pred_scores_list = pred_scores.split(boxes_per_image, 0) # (bs, 1000, 47) pred_ascores_list = pred_ascores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_attrs = [] for boxes, scores, ascores, image_shape in zip( pred_boxes_list, pred_scores_list, pred_ascores_list, image_shapes): # for each image boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # (1000,47) 都是从0,46结束 # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] ascores = ascores[:, 1:] # (1000,294) # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) # (46000,4) scores = scores.reshape(-1) # (46000,) labels = labels.reshape(-1) # (46000,) ascores = ascores.unsqueeze(0).repeat(46, 1, 1).reshape( -1, num_attrs - 1) # (46,1000,294)->(46000,294) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze( 1) # 压缩成1d的,用thresh去卡 boxes, scores, labels, ascores = boxes[inds], scores[inds], labels[ inds], ascores[inds] # remove empty boxes # attention!!! 压缩成1d的,用thresh去卡(每个box会预测46个类别的box,所以一个box对一个46个box,因此卡thresh是对类别单位的box进行的) keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, ascores = boxes[keep], scores[keep], labels[ keep], ascores[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, ascores = boxes[keep], scores[keep], labels[ keep], ascores[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) # TODO: add attributes-wise thresh # ascores: (post_rois, 294) # brute force ascore_list = [] for ascore in ascores: ascore_list.append( torch.where(ascore > self.attr_score_thresh) [0]) # list(Tensor()) # 长度不均匀的一维tensor all_attrs.append(ascore_list) # list[(post_rois, 294)] # all_scores: list[(post_rois,)], all_labels: list[(post_rois,)], all_attrs: list[list[attr1,attr2], (...)] 每个list里有每个roi的attr tensor return all_boxes, all_scores, all_labels, all_attrs
def postprocess_detections( self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] attr_logits, image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] device = class_logits.device num_classes = class_logits.shape[-1] #print(class_logits.shape) boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] #print(boxes_per_image) pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) #print(pred_scores.shape) pred_attr_scores = F.sigmoid(attr_logits) #print(pred_attr_scores.shape) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) pred_attr_scores_list = pred_attr_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_attr_scores = [] all_attr_labels = [] for boxes, scores, attr_scores, image_shape in zip( pred_boxes_list, pred_scores_list, pred_attr_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) #print(labels) #print(scores.shape) labels = labels.view(1, -1).expand_as(scores) #print(labels.shape) #print(attr_labels.shape) detection_per_image = attr_scores.shape[0] attr_classes = 341 curr_attr_shape = (detection_per_image, attr_classes) new_attr_shape = (detection_per_image, num_classes - 1, attr_classes) #print(attr_scores) attr_scores = attr_scores.unsqueeze(1) #print("Unsqueeze: ", attr_scores.shape) attr_scores = attr_scores.expand(*new_attr_shape) #print("Expand: ", attr_scores.shape) #print("New Val: ", attr_scores) #print("Attr Shape: ", attr_labels.shape) #print(attr_labels) attr_labels = torch.arange(341, device=device) attr_labels = attr_labels.view(1, 1, -1).expand_as(attr_scores) #print(attr_labels.shape) #print(attr_labels) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) attr_labels = attr_labels.reshape(-1, 341) attr_scores = attr_scores.reshape(-1, 341) #print(scores.shape) #print(labels.shape) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) #print(inds) boxes, scores, labels, attr_scores, attr_labels = boxes[ inds], scores[inds], labels[inds], attr_scores[ inds], attr_labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, attr_scores, attr_labels = boxes[ keep], scores[keep], labels[keep], attr_scores[ keep], attr_labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, attr_scores, attr_labels = boxes[ keep], scores[keep], labels[keep], attr_scores[ keep], attr_labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_attr_scores.append(attr_scores) all_attr_labels.append(attr_labels) return all_boxes, all_scores, all_labels, all_attr_scores, all_attr_labels
def ssm_postprocess_detections(self, head_outputs, anchors, image_shapes): # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) -> List[Dict[str, Tensor]] # TODO: Merge this with roi_heads.RoIHeads.postprocess_detections ? class_logits = head_outputs.pop('cls_logits') box_regression = head_outputs.pop('bbox_regression') other_outputs = head_outputs device = class_logits.device num_classes = class_logits.shape[-1] scores = torch.sigmoid(class_logits) # create labels for each score labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) detections = torch.jit.annotate(List[Dict[str, Tensor]], []) al_idx = 0 all_boxes = torch.empty([0, 4]).cuda() all_scores = torch.tensor([]).cuda() all_labels = [] CONF_THRESH = 0.5 # bigger leads more active learning samples for index, (box_regression_per_image, scores_per_image, labels_per_image, anchors_per_image, image_shape) in \ enumerate(zip(box_regression, scores, labels, anchors, image_shapes)): if torch.max(scores_per_image) < CONF_THRESH: # print(scores) al_idx = 1 detections.append({ "boxes": all_boxes, "labels": all_labels, "scores": all_scores, 'al': al_idx, }) continue boxes_per_image = self.box_coder.decode_single( box_regression_per_image, anchors_per_image) boxes_per_image = box_ops.clip_boxes_to_image( boxes_per_image, image_shape) other_outputs_per_image = [(k, v[index]) for k, v in other_outputs.items()] image_boxes = [] image_scores = [] image_labels = [] image_other_outputs = torch.jit.annotate(Dict[str, List[Tensor]], {}) for class_index in range(num_classes): # remove low scoring boxes inds = torch.gt(scores_per_image[:, class_index], self.score_thresh) boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \ boxes_per_image[inds], scores_per_image[inds, class_index], scores_per_image[inds], \ labels_per_image[inds, class_index] other_outputs_per_class = [(k, v[inds]) for k, v in other_outputs_per_image] keep = [i for i in range(len(boxes_per_class))] random.shuffle(keep) keep = keep[:500] boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \ boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep] other_outputs_per_class = [(k, v[keep]) for k, v in other_outputs_per_class] # remove empty boxes keep = box_ops.remove_small_boxes(boxes_per_class, min_size=1e-2) boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \ boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep] other_outputs_per_class = [(k, v[keep]) for k, v in other_outputs_per_class] # non-maximum suppression, independently done per class keep = box_ops.nms(boxes_per_class, scores_per_class, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes_per_class, scores_per_class, scores_all_class, labels_per_class = \ boxes_per_class[keep], scores_per_class[keep], scores_all_class[keep], labels_per_class[keep] other_outputs_per_class = [(k, v[keep]) for k, v in other_outputs_per_class] image_boxes.append(boxes_per_class) image_scores.append(scores_per_class) image_labels.append(labels_per_class) for k, v in other_outputs_per_class: if k not in image_other_outputs: image_other_outputs[k] = [] image_other_outputs[k].append(v) for i in range(len(boxes_per_class)): all_boxes = torch.cat( (all_boxes, boxes_per_class[i].unsqueeze(0)), 0) all_scores = torch.cat( (all_scores, scores_per_class[i].unsqueeze(0)), 0) all_labels.append(judge_y(scores_all_class[i][1:])) detections.append({ "boxes": all_boxes, "labels": all_labels, "scores": all_scores, 'al': al_idx, }) for k, v in image_other_outputs.items(): detections[-1].update({k: torch.cat(v, dim=0)}) return detections
def postprocess_detections( self, class_logits, # type: Tensor box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > cfg.BOX.SCORE_THRESH).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, cfg.BOX.NMS_THRESH) # keep only topk scoring predictions keep = keep[:cfg.BOX.DETECTIONS_PER_IMG] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def generate_proposals(self, anchors, objectness, box_regression, img_metas, is_target_domain=False): """ Args: anchors: objectness: (N, A, H, W) box_regression: (N, A * 4, H, W) img_metas: is_target_domain: Returns: """ pre_nms_top_n = self.pre_nms_top_n[self.training] post_nms_top_n = self.post_nms_top_n[self.training] if is_target_domain: post_nms_top_n = self.cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE nms_thresh = self.nms_thresh device = objectness.device N, A, H, W = objectness.shape objectness = objectness.permute(0, 2, 3, 1).reshape(N, H * W * A) objectness = objectness.sigmoid() box_regression = box_regression.permute(0, 2, 3, 1).reshape(N, H * W * A, 4) concat_anchors = cat(anchors, dim=0) concat_anchors = concat_anchors.reshape(N, A * H * W, 4) num_anchors = A * H * W pre_nms_top_n = min(pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] concat_anchors = concat_anchors[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) results = [] for proposal, score, img_meta in zip(proposals, objectness, img_metas): img_width, img_height = img_meta['img_shape'] proposal = box_ops.clip_boxes_to_image(proposal, (img_height, img_width)) keep = box_ops.remove_small_boxes(proposal, self.min_size) proposal = proposal[keep] score = score[keep] keep = ops.nms(proposal, score, nms_thresh) keep = keep[:post_nms_top_n] proposal = proposal[keep] score = score[keep] results.append(proposal) # (N, 4) return results
def postprocess_detections(self, class_logits, box_regression, anchors, image_shapes): # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]]) device = class_logits.device num_classes = class_logits.shape[-1] #$ boxes_per_image = [len(boxes_in_image) for boxes_in_image in anchors] pred_boxes = self.box_coder.decode(box_regression, anchors) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes # and just assign to pred_boxes instead of pred_boxes_list pred_boxes_list = [pred_boxes] pred_scores_list = [pred_scores] else: pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) row_to, collumn = 1, -1 labels = labels.view(row_to, collumn).expand_as(scores) along_class_prediction = 1 pred_class_labels = torch.argmax(scores, dim=along_class_prediction) prediction_num = torch.arange(boxes.shape[0], device=device) flat_pred_class_prob_idxs = prediction_num * num_classes + pred_class_labels top1_scores = torch.take(scores, flat_pred_class_prob_idxs) labels = pred_class_labels # remove predictions with the background label #boxes = boxes[:, 1:] #scores = scores[:, 1:] #labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance #boxes = boxes.reshape(-1, 4) #scores = scores.reshape(-1) #labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(top1_scores > self.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds].squeeze( 1), top1_scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def postprocess_detections(self, head_outputs, anchors, image_shapes): # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) -> List[Dict[str, Tensor]] # TODO: Merge this with roi_heads.RoIHeads.postprocess_detections ? class_logits = head_outputs.pop('cls_logits') box_regression = head_outputs.pop('bbox_regression') other_outputs = head_outputs device = class_logits.device num_classes = class_logits.shape[-1] scores = torch.sigmoid(class_logits) # create labels for each score labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) detections = torch.jit.annotate(List[Dict[str, Tensor]], []) for index, (box_regression_per_image, scores_per_image, labels_per_image, anchors_per_image, image_shape) in \ enumerate(zip(box_regression, scores, labels, anchors, image_shapes)): boxes_per_image = self.box_coder.decode_single( box_regression_per_image, anchors_per_image) boxes_per_image = box_ops.clip_boxes_to_image( boxes_per_image, image_shape) other_outputs_per_image = [(k, v[index]) for k, v in other_outputs.items()] image_boxes = [] image_scores = [] image_labels = [] image_other_outputs = torch.jit.annotate(Dict[str, List[Tensor]], {}) for class_index in range(num_classes): # remove low scoring boxes inds = torch.gt(scores_per_image[:, class_index], self.score_thresh) boxes_per_class, scores_per_class, labels_per_class = \ boxes_per_image[inds], scores_per_image[inds, class_index], labels_per_image[inds, class_index] other_outputs_per_class = [(k, v[inds]) for k, v in other_outputs_per_image] # remove empty boxes keep = box_ops.remove_small_boxes(boxes_per_class, min_size=1e-2) boxes_per_class, scores_per_class, labels_per_class = \ boxes_per_class[keep], scores_per_class[keep], labels_per_class[keep] other_outputs_per_class = [(k, v[keep]) for k, v in other_outputs_per_class] # non-maximum suppression, independently done per class keep = box_ops.nms(boxes_per_class, scores_per_class, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes_per_class, scores_per_class, labels_per_class = \ boxes_per_class[keep], scores_per_class[keep], labels_per_class[keep] other_outputs_per_class = [(k, v[keep]) for k, v in other_outputs_per_class] image_boxes.append(boxes_per_class) image_scores.append(scores_per_class) image_labels.append(labels_per_class) for k, v in other_outputs_per_class: if k not in image_other_outputs: image_other_outputs[k] = [] image_other_outputs[k].append(v) detections.append({ 'boxes': torch.cat(image_boxes, dim=0), 'scores': torch.cat(image_scores, dim=0), 'labels': torch.cat(image_labels, dim=0), }) for k, v in image_other_outputs.items(): detections[-1].update({k: torch.cat(v, dim=0)}) return detections
def postprocess_detections( self, class_logits, # type: Tensor dof_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes, # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in proposals ] pred_boxes = torch.cat(proposals, dim=0) N = dof_regression.shape[0] pred_boxes = pred_boxes.reshape(N, -1, 4) pred_dofs = dof_regression.reshape(N, -1, 6) pred_scores = F.softmax(class_logits, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) pred_dofs_list = pred_dofs.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_dofs = [] for boxes, dofs, scores, image_shape in zip(pred_boxes_list, pred_dofs_list, pred_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label dofs = dofs[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) dofs = dofs.reshape(-1, 6) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, dofs, scores, labels = ( boxes[inds], dofs[inds], scores[inds], labels[inds], ) # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, dofs, scores, labels = ( boxes[keep], dofs[keep], scores[keep], labels[keep], ) # create boxes from the predicted poses boxes, dofs = transform_pose_global_project_bbox( boxes, dofs, self.pose_mean, self.pose_stddev, image_shape, self.threed_68_points, bbox_x_factor=self.bbox_x_factor, bbox_y_factor=self.bbox_y_factor, expand_forehead=self.expand_forehead, ) # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) boxes, dofs, scores, labels = ( boxes[keep], dofs[keep], scores[keep], labels[keep], ) # keep only topk scoring predictions keep = keep[:self.detections_per_img] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_dofs.append(dofs) return all_boxes, all_dofs, all_scores, all_labels
def process_detections( self, outputs: Dict[str, Tensor], anchors: List[Tensor], im_szs: List[Tuple[int, int]], ) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]: " Process `outputs` and return the predicted bboxes, score, clas_labels above `detect_thres` " class_logits = outputs.pop("cls_preds") bboxes = outputs.pop("bbox_preds") scores = torch.sigmoid(class_logits) device = class_logits.device num_classes = class_logits.shape[-1] # create labels for each score labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) detections = torch.jit.annotate(List[Dict[str, Tensor]], []) for bb_per_im, sc_per_im, ancs_per_im, im_sz, lbl_per_im in zip(bboxes, scores, anchors, im_szs, labels): all_boxes = [] all_scores = [] all_labels = [] # convert the activation i.e, outputs of the model to bounding boxes bb_per_im = activ_2_bbox(bb_per_im, ancs_per_im) # clip the bounding boxes to the image size bb_per_im = ops.clip_boxes_to_image(bb_per_im, im_sz) # Iterate over each `cls_idx` in `num_classes` and do nms # to each class individually for cls_idx in range(num_classes): # remove low predicitons with scores < score_thres # and grab the predictions corresponding to the cls_idx inds = torch.gt(sc_per_im[:, cls_idx], self.score_thres) bb_per_cls, sc_per_cls, lbl_per_cls = ( bb_per_im[inds], sc_per_im[inds, cls_idx], lbl_per_im[inds, cls_idx], ) # remove boxes that are too small ~(1-02) keep = ops.remove_small_boxes(bb_per_cls, min_size=1e-2) bb_per_cls, sc_per_cls, lbl_per_cls = ( bb_per_cls[keep], sc_per_cls[keep], lbl_per_cls[keep], ) # compute non max supression to supress overlapping boxes keep = ops.nms(bb_per_cls, sc_per_cls, self.nms_thres) bb_per_cls, sc_per_cls, lbl_per_cls = ( bb_per_cls[keep], sc_per_cls[keep], lbl_per_cls[keep], ) all_boxes.append(bb_per_cls) all_scores.append(sc_per_cls) all_labels.append(lbl_per_cls) # Convert to tensors all_boxes = torch.cat(all_boxes, dim=0) all_scores = torch.cat(all_scores, dim=0) all_labels = torch.cat(all_labels, dim=0) # model is going to predict classes which are going to be in the range of [0, num_classes] # 0 is reserved for the background class for which no loss is calculate , so # we will add 1 to all the class_predictions to shift the predicitons range from # [0, num_classes) -> [1, num_classes] all_labels = all_labels + 1 # Sort by scores and # Grab the idxs from the corresponding to the topk predictions _, topk_idxs = all_scores.sort(descending=True) topk_idxs = topk_idxs[: self.detections_per_img] all_boxes, all_scores, all_labels = ( all_boxes[topk_idxs], all_scores[topk_idxs], all_labels[topk_idxs], ) detections.append({"boxes": all_boxes, "scores": all_scores, "labels": all_labels,}) return detections
def post_process(self, cls_logits: torch.Tensor, reg_deltas: torch.Tensor, batched_rois: List[torch.Tensor]): nms_threshold = self._params['nms_threshold'] conf_threshold = self._params['conf_threshold'] keep_top_n = self._params['keep_top_n'] batched_dets: List[torch.Tensor] = [] current = 0 for rois in batched_rois: N = rois.size(0) if N == 0: print("warning! found empty rois") batched_dets.append( torch.empty(0, 6, dtype=reg_deltas.dtype, device=reg_deltas.device)) continue logits = cls_logits[current:current + N] offsets = reg_deltas[current:current + N] current += N # logits: torch.Tensor(N,) # deltas: torch.Tensor(N,4) # rois: torch.Tensor(N,4) scores = torch.sigmoid(logits) preds = torch.zeros(scores.shape, dtype=torch.int64, device=scores.device) preds[scores >= 0.5] = 1 fg_preds_mask = preds != 0 # convert offsets to boxes # N,4 | N,4 => N,4 as xmin,ymin,xmax,ymax boxes = offsets2boxes(offsets.unsqueeze(0), rois).squeeze(0) # extract bg predictions boxes = boxes[fg_preds_mask] preds = preds[fg_preds_mask] scores = scores[fg_preds_mask] # apply conf threshold keep = scores >= conf_threshold scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # remove small keep = box_ops.remove_small_boxes(boxes, 1e-3) # TODO try 1 scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # batched nms keep = box_ops.batched_nms(boxes, scores, preds, nms_threshold) scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # select top n keep_n = min(keep_top_n, scores.size(0)) _, selected_ids = scores.topk(keep_n) scores, preds, boxes = scores[selected_ids], preds[ selected_ids], boxes[selected_ids] scores.unsqueeze_(1) preds = preds.unsqueeze(1).to(boxes.dtype) dets = torch.cat([boxes, scores, preds], dim=-1) batched_dets.append(dets) return batched_dets
def _postprocess_detections( self, class_logits: Tensor, box_features: Tensor, box_regression: Tensor, proposals: List[Tensor], image_shapes: List[Tuple[int, int]], ) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]: """ Adapted from https://github.com/pytorch/vision/blob/ 4521f6d152875974e317fa247a633e9ad1ea05c8/torchvision/models/detection/roi_heads.py#L664. The only reason we have to re-implement this method is so we can pull out the box features that we want. """ device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] # shape: (batch_size * boxes_per_image, num_classes, 4) pred_boxes = self.detector.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) features_list = box_features.split(boxes_per_image, dim=0) pred_scores_list = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_features = [] all_scores = [] all_labels = [] for boxes, features, scores, image_shape in zip( pred_boxes_list, features_list, pred_scores_list, image_shapes ): # shape: (boxes_per_image, num_classes, 4) boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # shape: (boxes_per_image, num_classes, feature_size) features = features.unsqueeze(1).expand(boxes.shape[0], boxes.shape[1], -1) # create labels for each prediction # shape: (num_classes,) labels = torch.arange(num_classes, device=device) # shape: (boxes_per_image, num_classes,) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label # shape: (boxes_per_image, num_classes - 1, 4) boxes = boxes[:, 1:] # shape: (boxes_per_image, num_classes, feature_size) features = features[:, 1:] # shape: (boxes_per_image, num_classes - 1,) scores = scores[:, 1:] # shape: (boxes_per_image, num_classes - 1,) labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance # shape: (boxes_per_image * (num_classes - 1), 4) boxes = boxes.reshape(-1, 4) # shape: (boxes_per_image * (num_classes - 1), feature_size) features = features.reshape(boxes.shape[0], -1) # shape: (boxes_per_image * (num_classes - 1),) scores = scores.reshape(-1) # shape: (boxes_per_image * (num_classes - 1),) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.where(scores > self.detector.roi_heads.score_thresh)[0] boxes, features, scores, labels = ( boxes[inds], features[inds], scores[inds], labels[inds], ) # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, features, scores, labels = ( boxes[keep], features[keep], scores[keep], labels[keep], ) # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.detector.roi_heads.nms_thresh) # keep only topk scoring predictions keep = keep[: self.detector.roi_heads.detections_per_img] boxes, features, scores, labels = ( boxes[keep], features[keep], scores[keep], labels[keep], ) all_boxes.append(boxes) all_features.append(features) all_scores.append(scores) all_labels.append(labels) return all_boxes, all_features, all_scores, all_labels