示例#1
0
def _postprocess(results,
                 result_mask_info,
                 output_height,
                 output_width,
                 mask_threshold=0.5):
    """
    Post-process the output boxes for TensorMask.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.
    This function will postprocess the raw outputs of TensorMask
    to produce outputs according to the desired output resolution.
    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place. Note that it does not contain the field
            `pred_masks`, which is provided by another input `result_masks`.
        result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results.
                The first item is a list of #detection tensors, each is the predicted masks.
                The second item is the anchors corresponding to the predicted masks.
        output_height, output_width: the desired output resolution.
    Returns:
        Instances: the postprocessed output from the model, based on the output resolution
    """
    scale_x, scale_y = (
        output_width / results.image_size[1],
        output_height / results.image_size[0],
    )
    results = Instances((output_height, output_width), **results.get_fields())

    output_boxes = results.pred_boxes
    output_boxes.tensor[:, 0::2] *= scale_x
    output_boxes.tensor[:, 1::2] *= scale_y
    output_boxes.clip(results.image_size)

    inds_nonempty = output_boxes.nonempty()
    results = results[inds_nonempty]
    result_masks, result_anchors = result_mask_info
    if result_masks:
        result_anchors.tensor[:, 0::2] *= scale_x
        result_anchors.tensor[:, 1::2] *= scale_y
        result_masks = [
            x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i
        ]
        results.pred_masks = _paste_mask_lists_in_image(
            result_masks,
            result_anchors[inds_nonempty],
            results.image_size,
            threshold=mask_threshold,
        )
    return results
示例#2
0
def detector_postprocess(results,
                         output_height,
                         output_width,
                         mask_threshold=0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    scale_x, scale_y = (output_width / results.image_size[1],
                        output_height / results.image_size[0])
    results = Instances((output_height, output_width), **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = paste_masks_in_image(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
示例#3
0
    def postprocess(self, results, output_height, output_width, resized_in_h,
                    resized_in_w, padded_im_h, padded_im_w):
        scale_x, scale_y = (output_width / resized_in_w,
                            output_height / resized_in_h)
        # gather detection result to Instances
        results = Instances((output_height, output_width),
                            **results.get_fields())
        # scale detection box results from resized_padded_image space to source image space and clip
        output_boxes = results.pred_boxes
        output_boxes.scale(scale_x, scale_y)
        output_boxes.clip(results.image_size)
        # filter empty detection in source image space
        results = results[output_boxes.nonempty()]
        if results.has("pred_global_logits"):
            mask_h, mask_w = results.pred_global_logits.shape[-2:]
            factor_h = padded_im_h // mask_h
            factor_w = padded_im_w // mask_w
            assert factor_h == factor_w
            factor = factor_h
            # aligned upsample instances mask to resized_padded_image shape
            pred_global_masks = aligned_bilinear(
                results.pred_global_logits.sigmoid(), factor)
            pred_global_masks = pred_global_masks[:, :, :resized_in_h, :
                                                  resized_in_w]
            # scale mask from resized_image shape to source image shape
            # this is a inverse procedure of opencv or PIL interpolation
            # which align_corners is False
            pred_global_masks = F.interpolate(pred_global_masks,
                                              size=(output_height,
                                                    output_width),
                                              mode="bilinear",
                                              align_corners=False)
            pred_global_masks = pred_global_masks[:, 0, :, :]
            # filter out the pred masks with low confidence score
            results.pred_masks = pred_global_masks > self.infer_mask_threshold

        return results