示例#1
0
 def forward(self, x, rel_matrix=None):
     # part module
     for i in range(self.n_stacked_convs // 2):
         layer_name = self._get_layer_name(i)
         x = getattr(self, layer_name)(x)
         x = F.relu(x)
     inter_x = x
     # for i in range(2):
     #     layer_name = self._get_deconv_layer_name(i, 'PM')
     #     inter_x = getattr(self, layer_name)(inter_x)
     #     inter_x = F.relu(inter_x)
     part_scores_logits = self.inter_part_score(inter_x)
     part_scores = F.sigmoid(part_scores_logits)
     rel_embs = self._forward_relation_embedding(part_scores, self.rel_matrix, self.word_emb)
     part_scores = interpolate(part_scores_logits, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
     part_scores = F.sigmoid(part_scores)
     for i in range(self.n_stacked_convs):
         layer_name = self._get_layer_name(i)
         rel_embs = getattr(self, layer_name)(rel_embs)
         rel_embs = F.relu(rel_embs)
     # kpt module
     for i in range(self.n_stacked_convs // 2, self.n_stacked_convs):
         layer_name = self._get_layer_name(i)
         x = getattr(self, layer_name)(x)
         x = F.relu(x)
     # for i in range(2):
     #     layer_name = self._get_deconv_layer_name(i, 'KM')
     #     x = getattr(self, layer_name)(x)
     #     x = F.relu(x)
     x = torch.cat([x, rel_embs], 1)
     x = self.kpt_score(x)
     x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
     return x, part_scores
示例#2
0
    def forward(self, features, rel_matrix=None):
        # part module
        x = features
        for i in range(self.n_stacked_convs // 2):
            layer_name = self._get_layer_name(i)
            x = getattr(self, layer_name)(x)
            x = F.relu(x)
        inter_x = x
        part_scores_logits = self.kpt_score(inter_x)
        # B, num_kpts, size_h, size_w = part_scores_logits.size(0), part_scores_logits.size(1),part_scores_logits.size(2),part_scores_logits.size(3)
        part_scores = F.softmax(part_scores_logits,dim=1)
        part_scores = part_scores[:,1:, :, :]
        # part_scores = part_scores.reshape((B,num_kpts,size_h,size_w))
        rel_embs = self._forward_relation_embedding(x, part_scores, self.kpt_rel_matrix, self.kpt_score.weight[:,1:,:,:])
        part_scores = interpolate(part_scores_logits, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
        part_scores = part_scores[:, 1:, :, :]
        # print(part_scores.size())
        for i in range(self.n_stacked_convs // 2, self.n_stacked_convs):
            layer_name = self._get_layer_name(i)
            rel_embs = getattr(self, layer_name)(rel_embs)
            rel_embs = F.relu(rel_embs)
        # kpt module
        # for i in range(self.n_stacked_convs // 2, self.n_stacked_convs):
        #     layer_name = self._get_layer_name(i)
        #     x = getattr(self, layer_name)(x)
        #     x = F.relu(x)
        # x = [x, rel_embs]
        # x = self._ama_module_forward(x)
        # kpt_weight = self._forward_kpt_weight_generate(self.kpt_rel_matrix, self.kpt_word_emb)
        # x = nn.functional.conv_transpose2d(x, weight=kpt_weight, padding=1, stride=2)
        kpt_scores = self.final_kpt_score(rel_embs)
        kpt_scores = interpolate(kpt_scores, scale_factor=self.up_scale, mode="bilinear", align_corners=False)


        return kpt_scores, part_scores.contiguous()
示例#3
0
 def forward_for_mask(self, boxlists):
     N, dim, h, w = self.masks.shape
     grid_x = torch.arange(w).view(1,-1).float().repeat(h,1).cuda() / (w-1) * 2 - 1
     grid_y = torch.arange(h).view(-1,1).float().repeat(1,w).cuda() / (h-1) * 2 - 1
     x_map = grid_x.view(1, 1, h, w).repeat(N, 1, 1, 1)
     y_map = grid_y.view(1, 1, h, w).repeat(N, 1, 1, 1)
     masks_feat = torch.cat((self.masks, x_map, y_map), dim=1)
     o_h = int(h * self.strides[0])
     o_w = int(w * self.strides[0])
     for im in range(N):
         boxlist = boxlists[im]
         input_h, input_w = boxlist.image_size
         mask = masks_feat[None, im]
         ins_num = boxlist.controllers.shape[0]
         weights1 = boxlist.controllers[:,:80].reshape(-1,8,10).reshape(-1,10).unsqueeze(-1).unsqueeze(-1)
         bias1 = boxlist.controllers[:, 80:88].flatten()
         weights2 = boxlist.controllers[:, 88:152].reshape(-1,8,8).reshape(-1,8).unsqueeze(-1).unsqueeze(-1)
         bias2 = boxlist.controllers[:, 152:160].flatten()
         weights3 = boxlist.controllers[:, 160:168].unsqueeze(-1).unsqueeze(-1)
         bias3 = boxlist.controllers[:,168:169].flatten()
         
         conv1 = F.conv2d(mask,weights1,bias1).relu()
         conv2 = F.conv2d(conv1, weights2, bias2, groups = ins_num).relu()
         masks_per_image = F.conv2d(conv2, weights3, bias3, groups = ins_num).sigmoid()
         masks = interpolate(masks_per_image, size = (o_h,o_w), mode="bilinear", align_corners=False)
         masks = masks[:, :, :input_h, :input_w].permute(1,0,2,3)
         boxlist.pred_masks = masks
     return boxlists
示例#4
0
文件: modules.py 项目: zivzone/d2go
 def forward(self, x):
     x = layers.interpolate(x,
                            scale_factor=(2, 2),
                            mode="nearest",
                            align_corners=False)
     x = self.kps_score_lowres(x)
     return x
示例#5
0
 def interp2d(input):
     return interpolate(
         input,
         scale_factor=self.scale_factor,
         mode="bilinear",
         align_corners=False,
     )
示例#6
0
 def forward(self, features):
     features = interpolate(features, scale_factor=2, mode="nearest")
     ann_index = self.ann_index_lowres(features)
     index_uv = self.index_uv_lowres(features)
     u = self.u_lowres(features)
     v = self.v_lowres(features)
     return (ann_index, index_uv, u, v), (None, None, None, None)
示例#7
0
 def forward(self, x):
     x = self.kps_score_lowres(x)
     x = layers.interpolate(x,
                            scale_factor=self.up_scale,
                            mode="bilinear",
                            align_corners=False)
     return x
示例#8
0
 def forward(self, x):
     for layer in self:
         x = layer(x)
     x = interpolate(x,
                     scale_factor=self.up_scale,
                     mode="bilinear",
                     align_corners=False)
     return x
示例#9
0
    def _subdivision_inference(self, features, mask_representations, instances):
        assert not self.training

        pred_boxes = [x.pred_boxes for x in instances]
        pred_classes = cat([x.pred_classes for x in instances])

        mask_logits = None
        # +1 here to include an initial step to generate the coarsest mask
        # prediction with init_resolution, when mask_logits is None.
        # We compute initial mask by sampling on a regular grid. coarse_mask
        # can be used as initial mask as well, but it's typically very low-res
        # so it will be completely overwritten during subdivision anyway.
        for _ in range(self.mask_point_subdivision_steps + 1):
            if mask_logits is None:
                point_coords = generate_regular_grid_point_coords(
                    pred_classes.size(0),
                    self.mask_point_subdivision_init_resolution,
                    pred_boxes[0].device,
                )
            else:
                mask_logits = interpolate(
                    mask_logits, scale_factor=2, mode="bilinear", align_corners=False
                )
                uncertainty_map = calculate_uncertainty(mask_logits, pred_classes)
                point_indices, point_coords = get_uncertain_point_coords_on_grid(
                    uncertainty_map, self.mask_point_subdivision_num_points
                )

            # Run the point head for every point in point_coords
            fine_grained_features = self._point_pooler(features, pred_boxes, point_coords)
            point_logits = self._get_point_logits(
                fine_grained_features, point_coords, mask_representations
            )

            if mask_logits is None:
                # Create initial mask_logits using point_logits on this regular grid
                R, C, _ = point_logits.shape
                mask_logits = point_logits.reshape(
                    R,
                    C,
                    self.mask_point_subdivision_init_resolution,
                    self.mask_point_subdivision_init_resolution,
                )
                # The subdivision code will fail with the empty list of boxes
                if len(pred_classes) == 0:
                    mask_rcnn_inference(mask_logits, instances)
                    return instances
            else:
                # Put point predictions to the right places on the upsampled grid.
                R, C, H, W = mask_logits.shape
                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
                mask_logits = (
                    mask_logits.reshape(R, C, H * W)
                    .scatter_(2, point_indices, point_logits)
                    .view(R, C, H, W)
                )
        mask_rcnn_inference(mask_logits, instances)
        return instances
示例#10
0
 def layers(self, x):
     for layer in self.blocks:
         x = F.relu(layer(x))
     x = self.score_lowres(x)
     x = interpolate(x,
                     scale_factor=self.up_scale,
                     mode="bilinear",
                     align_corners=False)
     return x
示例#11
0
 def interp2d(input):
     if self.scale_factor == 1:
         return input
         # return interpolate(
         #     input, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
         # )
     else:
         return interpolate(
             input, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
         )
示例#12
0
 def forward(self, x):
     for layer in self.blocks:
         x = F.relu(layer(x))
     #print('keypoint head=================')
     x = self.score_lowres(x)
     x = interpolate(x,
                     scale_factor=self.up_scale,
                     mode="bilinear",
                     align_corners=False)
     return x
示例#13
0
 def forward(self, x):
     for block in self.conv_fcns:
         x = block(x)
         x = F.relu(x)
     # x = self.conv_fcn(x)
     x = self.score_lowres(x)
     x = interpolate(x,
                     scale_factor=self.up_scale,
                     mode="bilinear",
                     align_corners=False)
     return x
示例#14
0
    def interp2d(self, tensor_nchw: torch.Tensor):
        """
        Bilinear interpolation method to be used for upscaling

        Args:
            tensor_nchw (tensor): tensor of shape (N, C, H, W)
        Return:
            tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
                by applying the scale factor to H and W
        """
        return interpolate(
            tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
        )
示例#15
0
 def forward_for_mask(self, boxlists):
     N, dim, m_h, m_w = self.protos.shape
     o_h = int(m_h * self.strides[0])
     o_w = int(m_w * self.strides[0])
     protos = interpolate(self.protos, size = (o_h,o_w), mode="bilinear", align_corners=False)
     for im in range(N):
         boxlist = boxlists[im]
         input_h, input_w = boxlist.image_size
         proto = protos[im]
         coeffs = boxlist.coeffs.unsqueeze(-1).unsqueeze(-1)
         masks = torch.sum(coeffs*proto,dim=1,keepdim = True).sigmoid()  
         masks = masks[:, :, :input_h, :input_w]
         boxlist.pred_masks = masks
     return boxlists
示例#16
0
    def forward(self, x, rel_matrix=None):
        if len(x) == 0:
            return torch.zeros(size=(0, 0, 0, 0), device=x.device)
        for i in range(self.n_stacked_convs):
            layer_name = self._get_layer_name(i)
            x = getattr(self, layer_name)(x)
            x = F.relu(x)

        kpt_weight = self._forward_relation_embedding(self.kpt_weight, self.rel_matrix)
        x =  nn.functional.conv_transpose2d(x, weight=kpt_weight, bias=self.kpt_bias, padding=1, stride=2)
        x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
        # x = self.score_lowres(x)

        return x
示例#17
0
 def prepare_masks(self, m_h, m_w, r_h, r_w, targets_masks):
     masks = []
     for im_i in range(len(targets_masks)):
         mask_t = targets_masks[im_i]
         if len(mask_t) == 0:
             masks.append(mask_t.new_tensor([]))
             continue
         n, h, w = mask_t.shape
         mask = mask_t.new_zeros((n, r_h, r_w))
         mask[:, :h, :w] = mask_t
         resized_mask = interpolate(
             input=mask.float().unsqueeze(0), size=(m_h, m_w), mode="bilinear", align_corners=False,
             )[0].gt(0)
         masks.append(resized_mask)
     return masks
示例#18
0
def process_heatmaps(maps, rois, img_shapes):
    """
    Extract predicted keypoint locations from heatmaps.
    Args:
        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
            each ROI and each keypoint.
        rois (Tensor): (#ROIs, 4). The box of each ROI.
    Returns:
        Tensor of shape (#ROIs, #keypoints, POOL_H, POOL_W) representing confidence scores
    """

    offset_i = (rois[:, 1]).int()
    offset_j = (rois[:, 0]).int()

    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
    widths_ceil = widths.ceil()
    heights_ceil = heights.ceil()

    # roi_map_scores = torch.zeros((maps.shape[0], maps.shape[1], imgShape[0], imgShape[1]))
    roi_map_scores = [torch.zeros((maps.shape[1], img_shapes[i][0], img_shapes[i][1])) for i in range(maps.shape[0])]
    num_rois, num_keypoints = maps.shape[:2]

    for i in range(num_rois):
        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
        # #keypoints x H x W
        roi_map = interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze(0)

        # softmax over the spatial region
        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
        max_score = max_score.view(num_keypoints, 1, 1)
        tmp_full_resolution = (roi_map - max_score).exp_()
        tmp_pool_resolution = (maps[i] - max_score).exp_()

        norm_score = ((tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)) * 255.0).to(torch.uint8)

        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
        # so that the scores of objects of different absolute sizes will be more comparable
        for idx in range(num_keypoints):
            roi_map_scores[i][idx, offset_i[i]:(offset_i[i] + outsize[0]), offset_j[i]:(offset_j[i] + outsize[1])] = \
                norm_score[idx, ...].float()

    return roi_map_scores
示例#19
0
def heatmaps_to_keypoints(maps: torch.Tensor,
                          rois: torch.Tensor) -> torch.Tensor:
    """
    Args:
        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W)
        rois (Tensor): (#ROIs, 4)

    Extract predicted keypoint locations from heatmaps. Output has shape
    (#rois, #keypoints, 4) with the last dimension corresponding to (x, y, logit, prob)
    for each keypoint.

    Converts a discrete image coordinate in an NxN image to a continuous keypoint coordinate. We
    maintain consistency with keypoints_to_heatmap by using the conversion from Heckbert 1990:
    c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
    """
    offset_x = rois[:, 0]
    offset_y = rois[:, 1]

    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
    widths_ceil = widths.ceil()
    heights_ceil = heights.ceil()

    num_rois, num_keypoints = maps.shape[:2]
    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)

    width_corrections = widths / widths_ceil
    height_corrections = heights / heights_ceil

    keypoints_idx = torch.arange(num_keypoints, device=maps.device)

    for i in range(num_rois):
        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
        roi_map = interpolate(maps[[i]],
                              size=outsize,
                              mode="bicubic",
                              align_corners=False).squeeze(
                                  0)  # #keypoints x H x W

        # softmax over the spatial region
        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
        max_score = max_score.view(num_keypoints, 1, 1)
        tmp_full_resolution = (roi_map - max_score).exp_()
        tmp_pool_resolution = (maps[i] - max_score).exp_()
        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W
        # So that the scores of objects of different absolute sizes will be more comparable
        roi_map_probs = tmp_full_resolution / tmp_pool_resolution.sum(
            (1, 2), keepdim=True)

        w = roi_map.shape[2]
        pos = roi_map.view(num_keypoints, -1).argmax(1)

        x_int = pos % w
        y_int = (pos - x_int) // w

        assert (roi_map_probs[keypoints_idx,
                              y_int, x_int] == roi_map_probs.view(
                                  num_keypoints, -1).max(1)[0]).all()

        x = (x_int.float() + 0.5) * width_corrections[i]
        y = (y_int.float() + 0.5) * height_corrections[i]

        xy_preds[i, :, 0] = x + offset_x[i]
        xy_preds[i, :, 1] = y + offset_y[i]
        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
        xy_preds[i, :, 3] = roi_map_probs[keypoints_idx, y_int, x_int]

    return xy_preds
示例#20
0
    def _forward_mask_point(self, features, mask_coarse_logits, instances):
        """
        Forward logic of the mask point head.
        """
        if not self.mask_point_on:
            return {} if self.training else mask_coarse_logits

        mask_features_list = [features[k] for k in self.mask_point_in_features]
        features_scales = [self._feature_scales[k] for k in self.mask_point_in_features]

        if self.training:
            proposal_boxes = [x.proposal_boxes for x in instances]
            gt_classes = cat([x.gt_classes for x in instances])
            with torch.no_grad():
                point_coords = get_uncertain_point_coords_with_randomness(
                    mask_coarse_logits,
                    lambda logits: calculate_uncertainty(logits, gt_classes),
                    self.mask_point_train_num_points,
                    self.mask_point_oversample_ratio,
                    self.mask_point_importance_sample_ratio,
                )

            fine_grained_features, point_coords_wrt_image = point_sample_fine_grained_features(
                mask_features_list, features_scales, proposal_boxes, point_coords
            )
            coarse_features = point_sample(mask_coarse_logits, point_coords, align_corners=False)
            point_logits = self.point_head(fine_grained_features, coarse_features)
            return {
                "loss_mask_point": roi_mask_point_loss(
                    point_logits, instances, point_coords_wrt_image
                )
            }
        else:
            pred_boxes = [x.pred_boxes for x in instances]
            pred_classes = cat([x.pred_classes for x in instances])
            # The subdivision code will fail with the empty list of boxes
            if len(pred_classes) == 0:
                return mask_coarse_logits

            mask_logits = mask_coarse_logits.clone()
            for subdivions_step in range(self.mask_point_subdivision_steps):
                mask_logits = interpolate(
                    mask_logits, scale_factor=2, mode="bilinear", align_corners=False
                )
                # If `mask_point_subdivision_num_points` is larger or equal to the
                # resolution of the next step, then we can skip this step
                H, W = mask_logits.shape[-2:]
                if (
                    self.mask_point_subdivision_num_points >= 4 * H * W
                    and subdivions_step < self.mask_point_subdivision_steps - 1
                ):
                    continue
                uncertainty_map = calculate_uncertainty(mask_logits, pred_classes)
                point_indices, point_coords = get_uncertain_point_coords_on_grid(
                    uncertainty_map, self.mask_point_subdivision_num_points
                )
                fine_grained_features, _ = point_sample_fine_grained_features(
                    mask_features_list, features_scales, pred_boxes, point_coords
                )
                coarse_features = point_sample(
                    mask_coarse_logits, point_coords, align_corners=False
                )
                point_logits = self.point_head(fine_grained_features, coarse_features)

                # put mask point predictions to the right places on the upsampled grid.
                R, C, H, W = mask_logits.shape
                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
                mask_logits = (
                    mask_logits.reshape(R, C, H * W)
                    .scatter_(2, point_indices, point_logits)
                    .view(R, C, H, W)
                )
            return mask_logits
示例#21
0
    def _forward_mask_point(self, features, mask_coarse_logits, instances):
        """
        Forward logic of the mask point head.
        """
        if not self.mask_point_on:
            return {} if self.training else mask_coarse_logits

        mask_features_list = [features[k] for k in self.mask_point_in_features]
        features_scales = [
            self._feature_scales[k] for k in self.mask_point_in_features
        ]

        if self.training:
            proposal_boxes = [x.proposal_boxes for x in instances]
            gt_classes = cat([x.gt_classes for x in instances])
            with torch.no_grad():
                point_coords = get_uncertain_point_coords_with_randomness(
                    mask_coarse_logits,
                    lambda logits: calculate_uncertainty(logits, gt_classes),
                    self.mask_point_train_num_points,
                    self.mask_point_oversample_ratio,
                    self.mask_point_importance_sample_ratio,
                )

            fine_grained_features, point_coords_wrt_image = point_sample_fine_grained_features(
                mask_features_list, features_scales, proposal_boxes,
                point_coords)
            coarse_features = point_sample(mask_coarse_logits,
                                           point_coords,
                                           align_corners=False)
            point_logits = self.point_head(fine_grained_features,
                                           coarse_features)
            return {
                "loss_mask_point":
                roi_mask_point_loss(point_logits, instances,
                                    point_coords_wrt_image)
            }
        else:
            pred_boxes = [x.pred_boxes for x in instances]
            pred_classes = cat([x.pred_classes for x in instances])
            # The subdivision code will fail with the empty list of boxes
            if len(pred_classes) == 0:
                return mask_coarse_logits

            mask_logits = None
            # +1 here to include an initial step to generate the coarsest mask
            # prediction with init_resolution, when mask_logits is None.
            # We compute initial mask by sampling on a regular grid. coarse_mask
            # can be used as initial mask as well, but it's typically very low-res
            # so it will be completely overwritten during subdivision anyway.
            for _ in range(self.mask_point_subdivision_steps + 1):
                if mask_logits is None:
                    point_coords = generate_regular_grid_point_coords(
                        pred_classes.size(0),
                        self.mask_point_subdivision_init_resolution,
                        pred_boxes[0].device,
                    )
                else:
                    mask_logits = interpolate(mask_logits,
                                              scale_factor=2,
                                              mode="bilinear",
                                              align_corners=False)
                    uncertainty_map = calculate_uncertainty(
                        mask_logits, pred_classes)
                    point_indices, point_coords = get_uncertain_point_coords_on_grid(
                        uncertainty_map,
                        self.mask_point_subdivision_num_points)

                # Run the point head for every point in point_coords
                fine_grained_features, _ = point_sample_fine_grained_features(
                    mask_features_list, features_scales, pred_boxes,
                    point_coords)
                coarse_features = point_sample(mask_coarse_logits,
                                               point_coords,
                                               align_corners=False)
                point_logits = self.point_head(fine_grained_features,
                                               coarse_features)

                if mask_logits is None:
                    # Create initial mask_logits using point_logits on this regular grid
                    R, C, _ = point_logits.shape
                    mask_logits = point_logits.reshape(
                        R,
                        C,
                        self.mask_point_subdivision_init_resolution,
                        self.mask_point_subdivision_init_resolution,
                    )
                else:
                    # Put point predictions to the right places on the upsampled grid.
                    R, C, H, W = mask_logits.shape
                    point_indices = point_indices.unsqueeze(1).expand(
                        -1, C, -1)
                    mask_logits = (mask_logits.reshape(R, C, H * W).scatter_(
                        2, point_indices, point_logits).view(R, C, H, W))
            return mask_logits
示例#22
0
def heatmaps_to_keypoints(maps: torch.Tensor,
                          rois: torch.Tensor) -> torch.Tensor:
    """
    Extract predicted keypoint locations from heatmaps.

    Args:
        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
            each ROI and each keypoint.
        rois (Tensor): (#ROIs, 4). The box of each ROI.

    Returns:
        Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
        (x, y, logit, score) for each keypoint.

    When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
    we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
    Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
    """
    # The decorator use of torch.no_grad() was not supported by torchscript.
    # https://github.com/pytorch/pytorch/pull/41371
    maps = maps.detach()
    rois = rois.detach()

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]

    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
    widths_ceil = widths.ceil()
    heights_ceil = heights.ceil()

    num_rois, num_keypoints = maps.shape[:2]
    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)

    width_corrections = widths / widths_ceil
    height_corrections = heights / heights_ceil

    keypoints_idx = torch.arange(num_keypoints, device=maps.device)

    for i in range(num_rois):
        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
        roi_map = interpolate(maps[[i]],
                              size=outsize,
                              mode="bicubic",
                              align_corners=False).squeeze(
                                  0)  # #keypoints x H x W

        # softmax over the spatial region
        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
        max_score = max_score.view(num_keypoints, 1, 1)
        tmp_full_resolution = (roi_map - max_score).exp_()
        tmp_pool_resolution = (maps[i] - max_score).exp_()
        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
        # so that the scores of objects of different absolute sizes will be more comparable
        roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum(
            (1, 2), keepdim=True)

        w = roi_map.shape[2]
        pos = roi_map.view(num_keypoints, -1).argmax(1)

        x_int = pos % w
        y_int = (pos - x_int) // w

        assert (roi_map_scores[keypoints_idx, y_int,
                               x_int] == roi_map_scores.view(
                                   num_keypoints, -1).max(1)[0]).all()

        x = (x_int.float() + 0.5) * width_corrections[i]
        y = (y_int.float() + 0.5) * height_corrections[i]

        xy_preds[i, :, 0] = x + offset_x[i]
        xy_preds[i, :, 1] = y + offset_y[i]
        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
        xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]

    return xy_preds