def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] images = ImageList.from_tensors(images) return images
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: image: Tensor, image in (C, H, W) format. sem_seg: semantic segmentation ground truth Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "sem_seg" whose value is a Tensor of the output resolution that represents the per-pixel segmentation prediction. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) features = self.backbone(images.tensor) if "sem_seg" in batched_inputs[0]: targets = [x["sem_seg"].to(self.device) for x in batched_inputs] targets = ImageList.from_tensors( targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value).tensor else: targets = None results, losses = self.sem_seg_head(features, targets) if self.training: return losses processed_results = [] for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): height = input_per_image.get("height") width = input_per_image.get("width") r = sem_seg_postprocess(result, image_size, height, width) processed_results.append({"sem_seg": r}) return processed_results
def preprocess_flow(self, batched_inputs): """ Normalize and pad and batch the target flow. """ flows = [x["flow_map"].to(self.device) for x in batched_inputs] flows = [x / self.flow_div for x in flows] flows = ImageList.from_tensors(flows).tensor return flows
def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) return images
def preprocess_batchedimages(self, batched_inputs): """ Preprocess batch: normalized, resize, pad -> uniform batch """ images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) return images
def preprocess_image(self, inputs): data, im_info = inputs print(data.size()) data = alias(data, "data") mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std images = (data - mean) / std images = ImageList(tensor=images, image_sizes=im_info) return images
def preprocess_image(self, batched_inputs): """normalize, pad and batch the input images""" images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors( images, self.backbone.size_divisibility ) return images
def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]): images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor metas = [] rescale = {"height" in x for x in batched_inputs} if len(rescale) != 1: raise ValueError("Some inputs have original height/width, but some don't!") rescale = list(rescale)[0] output_shapes = [] for input in batched_inputs: meta = {} c, h, w = input["image"].shape meta["img_shape"] = meta["ori_shape"] = (h, w, c) if rescale: scale_factor = np.sqrt(h * w / (input["height"] * input["width"])) ori_shape = (input["height"], input["width"]) output_shapes.append(ori_shape) meta["ori_shape"] = ori_shape + (c,) else: scale_factor = 1.0 output_shapes.append((h, w)) meta["scale_factor"] = scale_factor meta["flip"] = False padh, padw = images.shape[-2:] meta["pad_shape"] = (padh, padw, c) metas.append(meta) if self.training: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] if gt_instances[0].has("gt_masks"): from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks def convert_mask(m, shape): # mmdet mask format if isinstance(m, BitMasks): return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1]) else: return mm_PolygonMasks(m.polygons, shape[0], shape[1]) gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances] else: gt_masks = None losses_and_metrics = self.detector.forward_train( images, metas, [x.gt_boxes.tensor for x in gt_instances], [x.gt_classes for x in gt_instances], gt_masks=gt_masks, ) return _parse_losses(losses_and_metrics) else: results = self.detector.simple_test(images, metas, rescale=rescale) results = [ {"instances": _convert_mmdet_result(r, shape)} for r, shape in zip(results, output_shapes) ] return results
def test_roi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) cfg.MODEL.MASK_ON = True num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = Boxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_instance0.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5) gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = Boxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instance1.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = StandardROIHeads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.5253729820251465, "loss_box_reg": 0.009785720147192478, "loss_mask": 0.693184494972229, "loss_rpn_cls": 0.08186662942171097, "loss_rpn_loc": 0.1104838103055954, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys()) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()}), )
def test_StandardROIHeads_scriptability(self): cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) cfg.MODEL.MASK_ON = True cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} roi_heads = StandardROIHeads(cfg, feature_shape).eval() proposal0 = Instances(image_sizes[0]) proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) proposal0.proposal_boxes = Boxes(proposal_boxes0) proposal0.objectness_logits = torch.tensor([0.5, 0.7], dtype=torch.float32) proposal1 = Instances(image_sizes[1]) proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) proposal1.proposal_boxes = Boxes(proposal_boxes1) proposal1.objectness_logits = torch.tensor([0.1, 0.9], dtype=torch.float32) proposals = [proposal0, proposal1] pred_instances, _ = roi_heads(images, features, proposals) fields = { "objectness_logits": torch.Tensor, "proposal_boxes": Boxes, "pred_classes": torch.Tensor, "scores": torch.Tensor, "pred_masks": torch.Tensor, "pred_boxes": Boxes, "pred_keypoints": torch.Tensor, "pred_keypoint_heatmaps": torch.Tensor, } with patch_instances(fields) as new_instances: proposal0 = new_instances.from_instances(proposal0) proposal1 = new_instances.from_instances(proposal1) proposals = [proposal0, proposal1] scripted_rot_heads = torch.jit.script(roi_heads) scripted_pred_instances, _ = scripted_rot_heads( images, features, proposals) for instance, scripted_instance in zip(pred_instances, scripted_pred_instances): assert_instances_allclose(instance, scripted_instance.to_instances(), rtol=0)
def preprocess_image(self, batched_inputs): # all models from detectron2 preprocess the images the same way # this could change in the future; fingers crossed # reference: https://github.com/facebookresearch/detectron2/tree/master/detectron2/modeling/meta_arch # last checked: 23.11.20 images = [x["image"].to(self.model.device) for x in batched_inputs] images = [(x - self.model.pixel_mean) / self.model.pixel_std for x in images] images = ImageList.from_tensors(images, self.model.backbone.size_divisibility) return images
def _preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] labels = torch.LongTensor([x["label"] for x in batched_inputs]).to(self.device) images = ImageList.from_tensors(images) return images, labels
def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x.to(self.device) for x in batched_inputs] norms = [self.normalizer(x) for x in images] size = (norms[0].shape[1], norms[0].shape[2]) images = ImageList.from_tensors(norms, self.backbone.size_divisibility) return images, size
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper`. Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * "image": Tensor, image in (C, H, W) format. * "instances": Instances * "sem_seg": semantic segmentation ground truth. * Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: each dict has the results for one image. The dict contains the following keys: * "instances": see :meth:`GeneralizedRCNN.forward` for its format. * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. * "panoptic_seg": See the return value of :func:`combine_semantic_and_instance_outputs` for its format. """ if not self.training: return self.inference(batched_inputs) images = self.preprocess_image(batched_inputs) features = self.backbone(images.tensor) assert "sem_seg" in batched_inputs[0] gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] gt_sem_seg = ImageList.from_tensors( gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value ).tensor sem_seg_results, sem_seg_losses, feat, seg_score = self.sem_seg_head(features, gt_sem_seg) # del sem_seg_results # gt_instances = [x["instances"].to(self.device) for x in batched_inputs] proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) detector_results, detector_losses, box_features = self.roi_heads( images, features, proposals, gt_instances ) ############################# # Graph op ############################# instance, sem_seg_results, losses_graph = self.graph_connection( box_features, detector_results, features, feat, seg_score, gt_sem_seg, ) losses = sem_seg_losses losses.update(proposal_losses) losses.update(detector_losses) losses.update(losses_graph) return losses
def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] image_scales = [x["im_scale"] for x in batched_inputs] images = ImageList.from_tensors(images, self.backbone.size_divisibility) images.image_scales = image_scales return images
def preprocess_image(self, batched_inputs: Tuple[Dict[str, Tensor]]): ''' Normalize and batch the input images. ''' images = [x["image"].to(self.device) for x in batched_inputs] images = [x.float().div(255) for x in images if x.dtype != torch.float] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images) return images
def repad_image_list( il: "ImageList", pad_value: float = 0.0, inplace: bool = True ) -> "ImageList": if inplace == False: il = ImageList(il.tensor.clone().detach(), copy.deepcopy(il.image_sizes)) for i in range(len(il.image_sizes)): h, w = il.image_sizes[i] il.tensor[i, ..., h:, :] = pad_value il.tensor[i, ..., :, w:] = pad_value return il
def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] if self.dynamic: images = ImageList.from_tensors(images, self.backbone.size_divisibility) else: if self.training: min_size = self.input.MIN_SIZE_TRAIN max_size = self.input.MAX_SIZE_TRAIN else: min_size = self.input.MIN_SIZE_TEST max_size = self.input.MAX_SIZE_TEST min_size = min_size[0] if isinstance(min_size, tuple) else min_size images = ImageList.from_tensors(images, self.backbone.size_divisibility, max_height=min_size, max_width=max_size) return images
def preprocess_image(self, batched_inputs, norm=True): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] if norm: images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, 512) images = images.to(self.device) return images
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = build_roi_heads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator(images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.365657806396484, "loss_box_reg": 0.0015851043863222003, "loss_rpn_cls": 0.2427729219198227, "loss_rpn_loc": 0.3646621108055115, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys() ) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()} ), )
def forward(self, batched_inputs): # complete image images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.inpaint_net.size_divisibility) # triplet input maps: # erased regions masks = [x["mask"].to(self.device) for x in batched_inputs] masks = ImageList.from_tensors(masks, self.inpaint_net.size_divisibility) # mask the input image with masks erased_ims = images.tensor * (1. - masks.tensor) # ones map ones_ims = [ torch.ones_like(x["mask"].to(self.device)) for x in batched_inputs ] ones_ims = ImageList.from_tensors(ones_ims, self.inpaint_net.size_divisibility) # the conv layer use zero padding, this is used to indicate the image boundary # generation process input_tensor = torch.cat([erased_ims, ones_ims.tensor, masks.tensor], dim=1) coarse_inp, fine_inp, offset_flow = self.inpaint_net( input_tensor, masks.tensor) # offset_flow is used to visualize if self.training: raise NotImplementedError else: processed_results = [] inpainted_im = erased_ims * ( 1. - masks.tensor) + fine_inp * masks.tensor for result, input_per_image, image_size in zip( inpainted_im, batched_inputs, images.image_sizes): height = input_per_image.get("height") width = input_per_image.get("width") r = sem_seg_postprocess(result, image_size, height, width) # abuse semantic segmentation postprocess. it basically does some resize processed_results.append({"inpainted": r}) return processed_results
def gather_instance_to_global_mask(pred_mask_logits, instances): cls_agnostic_mask = pred_mask_logits.size(1) == 1 mask_size = pred_mask_logits.size()[-2:] assert cls_agnostic_mask gt_masks = [] for ins_per_im in instances: # assert mask_size == ins_per_im.gt_masks.image_size gt_masks.append(ins_per_im.gt_masks.tensor.any(0)[None] != 0) # FIXME replace hard coded 32 return ImageList.from_tensors(gt_masks, 32).tensor
def load(self, filenames: ManyPaths) -> ImagesWithSize: images = [{"file_name": str(f)} for f in filenames] images = [self.mapper(i) for i in images] from detectron2.structures import ImageList images = ImageList.from_tensors([image["image"] for image in images]) return (images.tensor.float() / 256, torch.tensor(images.image_sizes, dtype=torch.int32)) # type: ignore
def forward(self, inputs): if not self.training: return self.inference(inputs) images = [x["image"] for x in inputs] images = ImageList.from_tensors(images, 1) ret = self.conv(images.tensor) ret = self.bn(ret) ret = self.relu(ret) ret = self.avgpool(ret) return {"loss": ret.norm()}
def preprocess_image(self, batched_inputs: List[Dict[str, Tensor]]): """ Normalize, pad and batch the input images. """ images = [ self._move_to_current_device(x["image"]) for x in batched_inputs ] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) return images
def forward(self, images, features, gt_instances=None): """ See :class:`RPN.forward`. """ num_branch = self.num_branch if self.training or not self.trident_fast else 1 # Duplicate images and gt_instances for all branches in TridentNet. all_images = ImageList(torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch) all_gt_instances = gt_instances * num_branch if gt_instances is not None else None return super(TridentRPN, self).forward(all_images, features, all_gt_instances)
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * "image": Tensor, image in (C, H, W) format. * "instances": Instances * "sem_seg": semantic segmentation ground truth. * Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. Returns: list[dict]: each dict is the results for one image. The dict contains the following keys: * "instances": Instances results. * "sem_seg": Semantic Segmentation results. * "panoptic_seg": available when `MODEL.INFERENCE.COMBINE.ENABLE`. See the return value of :func:`combine_thing_and_stuff` for its format. """ if self.export_onnx: print('[WARN] exporting onnx...') assert isinstance(batched_inputs, torch.Tensor) or isinstance( batched_inputs, list), 'onnx export, batched_inputs only needs image tensor' images = batched_inputs else: images = [x["image"].to(self.device) for x in batched_inputs] images = [self.normalizer(x) for x in images] images = ImageList.from_tensors( images, self.backbone.size_divisibility) if self.export_onnx: features = self.backbone(images) else: features = self.backbone(images.tensor) encode_feat = self.semantic_fpn(features) encode_feat = self.feature_encoder(encode_feat) features_in = [features[_feat] for _feat in self.in_feature] pred_centers, pred_regions, pred_weights = multi_apply( self.forward_single_level, features_in) if self.training: gt_dict = self.get_ground_truth.generate( batched_inputs, images, pred_weights, encode_feat) return self.losses(pred_centers, pred_regions, pred_weights, encode_feat, gt_dict) else: if self.export_onnx: # return pred_centers, pred_regions, pred_weights return self.inference_onnx(batched_inputs, images, pred_centers, pred_regions, pred_weights, encode_feat) else: return self.inference(batched_inputs, images, pred_centers, pred_regions, pred_weights, encode_feat)
def forward(self, inputs): images = [x["image"] for x in inputs] images = ImageList.from_tensors(images, 1) ret = self.conv(images.tensor) losses = {"loss": ret.norm()} # run the same conv again ret1 = self.conv(images.tensor) losses["ret1"] = ret1.norm() return losses
def preprocess_image(self, batched_inputs): """ Normalize, pad and batch the input images. """ images = [x["image"].float().to(self.device) for x in batched_inputs] # print(images) # assert False images = [self.normalizer(x) for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) return images
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.381618499755859), "loss_box_reg": torch.tensor(0.0011829272843897343), } for name in expected_losses.keys(): err_msg = "detector_losses[{}] = {}, expected losses = {}".format( name, detector_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]), err_msg)