def __evaluate_image_preds_no_gt( self, det: Tensor, idx: int, det_label_mask: Tensor, max_det: int, area_range: Tuple[int, int], nb_iou_thrs: int ) -> Dict[str, Any]: """Some predictions but no GT.""" # GTs nb_gt = 0 gt_ignore = torch.zeros(nb_gt, dtype=torch.bool, device=self.device) # Detections det = det[det_label_mask] scores = self.detection_scores[idx] scores_filtered = scores[det_label_mask] scores_sorted, dtind = torch.sort(scores_filtered, descending=True) det = det[dtind] if len(det) > max_det: det = det[:max_det] nb_det = len(det) det_areas = box_area(det).to(self.device) det_ignore_area = (det_areas < area_range[0]) | (det_areas > area_range[1]) ar = det_ignore_area.reshape((1, nb_det)) det_ignore = torch.repeat_interleave(ar, nb_iou_thrs, 0) return { "dtMatches": torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool, device=self.device), "gtMatches": torch.zeros((nb_iou_thrs, nb_gt), dtype=torch.bool, device=self.device), "dtScores": scores_sorted, "gtIgnore": gt_ignore, "dtIgnore": det_ignore, }
def __evaluate_image_gt_no_preds(self, gt: Tensor, gt_label_mask: Tensor, area_range: Tuple[int, int], nb_iou_thrs: int) -> Dict[str, Any]: """Some GT but no predictions.""" # GTs gt = gt[gt_label_mask] nb_gt = len(gt) areas = box_area(gt) ignore_area = (areas < area_range[0]) | (areas > area_range[1]) gt_ignore, _ = torch.sort(ignore_area.to(torch.uint8)) gt_ignore = gt_ignore.to(torch.bool) # Detections nb_det = 0 det_ignore = torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool, device=self.device) return { "dtMatches": torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool, device=self.device), "gtMatches": torch.zeros((nb_iou_thrs, nb_gt), dtype=torch.bool, device=self.device), "dtScores": torch.zeros(nb_det, dtype=torch.bool, device=self.device), "gtIgnore": gt_ignore, "dtIgnore": det_ignore, }
def test_box_area(self): # A bounding box of area 10000 and a degenerate case box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float) expected = torch.tensor([10000, 0]) calc_area = ops.box_area(box_tensor) assert calc_area.size() == torch.Size([2]) assert calc_area.dtype == box_tensor.dtype assert torch.all(torch.eq(calc_area, expected)).item() is True
def stats_dataset(dataset: ObjectDetectionDataSet, rcnn_transform: GeneralizedRCNNTransform = False): """ Iterates over the dataset and returns some stats. Can be useful to pick the right anchor box sizes. """ from torchvision.ops import box_convert, box_area stats = { 'image_height': [], 'image_width': [], 'image_mean': [], 'image_std': [], 'boxes_height': [], 'boxes_width': [], 'boxes_num': [], 'boxes_area': [] } for batch in dataset: # Batch x, y, x_name, y_name = batch['x'], batch['y'], batch['x_name'], batch[ 'y_name'] # Transform if rcnn_transform: x, y = rcnn_transform([x], [y]) x, y = x.tensors, y[0] # Image stats['image_height'].append(x.shape[-2]) stats['image_width'].append(x.shape[-1]) stats['image_mean'].append(x.mean().item()) stats['image_std'].append(x.std().item()) # Target wh = box_convert(y['boxes'], 'xyxy', 'xywh')[:, -2:] stats['boxes_height'].append(wh[:, -2]) stats['boxes_width'].append(wh[:, -1]) stats['boxes_num'].append(len(wh)) stats['boxes_area'].append(box_area(y['boxes'])) stats['image_height'] = torch.tensor(stats['image_height'], dtype=torch.float) stats['image_width'] = torch.tensor(stats['image_width'], dtype=torch.float) stats['image_mean'] = torch.tensor(stats['image_mean'], dtype=torch.float) stats['image_std'] = torch.tensor(stats['image_std'], dtype=torch.float) stats['boxes_height'] = torch.cat(stats['boxes_height']) stats['boxes_width'] = torch.cat(stats['boxes_width']) stats['boxes_area'] = torch.cat(stats['boxes_area']) stats['boxes_num'] = torch.tensor(stats['boxes_num'], dtype=torch.float) return stats
def _encode_targets(self, cls_labels, bbox_labels, instance_mask_labels): points = self.points.clone() regress_ranges = self.regress_ranges.clone() num_points = points.size(0) num_gts = cls_labels.size(0) regress_ranges = regress_ranges[:, None, :].repeat( 1, num_gts, 1) # [num_points, num_gts, 2] bbox_areas = cv_ops.box_area(bbox_labels)[None].repeat( num_points, 1) # [num_points, num_gts] expanded_points = points[:, None, :].repeat(1, num_gts, 1) expanded_bboxes = bbox_labels[None, :, :].repeat(num_points, 1, 1) distance_targets = bbox_ops.convert_bbox_to_distance( expanded_points, expanded_bboxes) # [num_points, num_gts, 4] # instance_mask_labels = instance_mask_labels[None, :, :, :].repeat(num_points, 1, 1, 1) # [num_points, num_gts, roi_size, roi_size] # Condition 1: inside a gt bbox inside_gt_bbox_mask = distance_targets.min( dim=-1)[0] > 0 # [num_points, num_gts] # Condition 2: limit the regression range for each location max_regress_distance = distance_targets.max( dim=-1)[0] # [num_points, num_gts] inside_regress_range = ( max_regress_distance >= regress_ranges[..., 0]) & ( max_regress_distance <= regress_ranges[..., 1] ) # [num_points, num_gts] # If there are still more than one instances for a location, we choose the one with minimal area bbox_areas[inside_gt_bbox_mask == 0] = tools.INF bbox_areas[inside_regress_range == 0] = tools.INF min_area, min_area_idx = bbox_areas.min( dim=1) # [num_points], Assign a gt to each location class_targets = cls_labels[min_area_idx] class_targets[min_area == tools.INF] = 0 distance_targets = distance_targets[range(num_points), min_area_idx, :] # instance_mask_labels = instance_mask_labels[range(num_points), min_area_idx, :, :] return class_targets, distance_targets # , instance_mask_labels
def area_check(box, expected, tolerance=1e-4): out = ops.box_area(box) assert out.size() == expected.size() assert ((out - expected).abs().max() < tolerance).item()
def area_check(box, expected, tolerance=1e-4): out = ops.box_area(box) torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
# 读写图像: torchvision.io包 tensor = io.read_image("../../data/image/1.jpg") print("tensor shape:", tensor.shape) io.write_png(tensor, "../../data/image/result.png") tensor = io.read_image("../../data/image/lena.png") print("tensor shape:", tensor.shape) io.write_jpeg(tensor, "../../data/image/result.jpg") # 下载pre-trained AlexNet模型: torchvision.models包 net = models.alexnet(pretrained=True) # 计算机视觉操作: torchvision.ops包 boxes = torch.tensor([[1, 1, 101, 101], [3, 5, 13, 15], [2, 4, 22, 44]]) area = ops.box_area(boxes) print(f"area: {area}") index = ops.remove_small_boxes(boxes, min_size=20) print(f"index: {index}") # 图像变换: torchvision.transforms包 resize = transforms.Resize(size=[256, 128]) img = resize.forward(tensor) io.write_jpeg(img, "../../data/image/resize.jpg") grayscale = transforms.Grayscale() img2 = grayscale.forward(img) io.write_jpeg(img2, "../../data/image/gray.jpg") affine = transforms.RandomAffine(degrees=35)
def _evaluate_image( self, idx: int, class_id: int, area_range: Tuple[int, int], max_det: int, ious: dict ) -> Optional[dict]: """Perform evaluation for single class and image. Args: idx: Image Id, equivalent to the index of supplied samples. class_id: Class Id of the supplied ground truth and detection labels. area_range: List of lower and upper bounding box area threshold. max_det: Maximum number of evaluated detection bounding boxes. ious: IoU results for image and class. """ gt = self.groundtruth_boxes[idx] det = self.detection_boxes[idx] gt_label_mask = self.groundtruth_labels[idx] == class_id det_label_mask = self.detection_labels[idx] == class_id # No Gt and No predictions --> ignore image if len(gt_label_mask) == 0 and len(det_label_mask) == 0: return None nb_iou_thrs = len(self.iou_thresholds) # Some GT but no predictions if len(gt_label_mask) > 0 and len(det_label_mask) == 0: return self.__evaluate_image_gt_no_preds(gt, gt_label_mask, area_range, nb_iou_thrs) # Some predictions but no GT if len(gt_label_mask) == 0 and len(det_label_mask) >= 0: return self.__evaluate_image_preds_no_gt(det, idx, det_label_mask, max_det, area_range, nb_iou_thrs) gt = gt[gt_label_mask] det = det[det_label_mask] if gt.numel() == 0 and det.numel() == 0: return None areas = box_area(gt) ignore_area = (areas < area_range[0]) | (areas > area_range[1]) # sort dt highest score first, sort gt ignore last ignore_area_sorted, gtind = torch.sort(ignore_area.to(torch.uint8)) # Convert to uint8 temporarily and back to bool, because "Sort currently does not support bool dtype on CUDA" ignore_area_sorted = ignore_area_sorted.to(torch.bool) gt = gt[gtind] scores = self.detection_scores[idx] scores_filtered = scores[det_label_mask] scores_sorted, dtind = torch.sort(scores_filtered, descending=True) det = det[dtind] if len(det) > max_det: det = det[:max_det] # load computed ious ious = ious[idx, class_id][:, gtind] if len(ious[idx, class_id]) > 0 else ious[idx, class_id] nb_iou_thrs = len(self.iou_thresholds) nb_gt = len(gt) nb_det = len(det) gt_matches = torch.zeros((nb_iou_thrs, nb_gt), dtype=torch.bool) det_matches = torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool) gt_ignore = ignore_area_sorted det_ignore = torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool) if torch.numel(ious) > 0: for idx_iou, t in enumerate(self.iou_thresholds): for idx_det, _ in enumerate(det): m = MeanAveragePrecision._find_best_gt_match(t, gt_matches, idx_iou, gt_ignore, ious, idx_det) if m == -1: continue det_ignore[idx_iou, idx_det] = gt_ignore[m] det_matches[idx_iou, idx_det] = 1 gt_matches[idx_iou, m] = 1 # set unmatched detections outside of area range to ignore det_areas = box_area(det) det_ignore_area = (det_areas < area_range[0]) | (det_areas > area_range[1]) ar = det_ignore_area.reshape((1, nb_det)) det_ignore = torch.logical_or( det_ignore, torch.logical_and(det_matches == 0, torch.repeat_interleave(ar, nb_iou_thrs, 0)) ) return { "dtMatches": det_matches, "gtMatches": gt_matches, "dtScores": scores_sorted, "gtIgnore": gt_ignore, "dtIgnore": det_ignore, }