Пример #1
0
def detector_postprocess(results,
                         output_height,
                         output_width,
                         mask_threshold=0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    scale_x, scale_y = (output_width / results.image_size[1],
                        output_height / results.image_size[0])
    results = Instances((output_height, output_width), **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = paste_masks_in_image(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
Пример #2
0
    def postprocess(self, results, output_height, output_width, resized_in_h,
                    resized_in_w, padded_im_h, padded_im_w):
        scale_x, scale_y = (output_width / resized_in_w,
                            output_height / resized_in_h)
        # gather detection result to Instances
        results = Instances((output_height, output_width),
                            **results.get_fields())
        # scale detection box results from resized_padded_image space to source image space and clip
        output_boxes = results.pred_boxes
        output_boxes.scale(scale_x, scale_y)
        output_boxes.clip(results.image_size)
        # filter empty detection in source image space
        results = results[output_boxes.nonempty()]
        if results.has("pred_global_logits"):
            mask_h, mask_w = results.pred_global_logits.shape[-2:]
            factor_h = padded_im_h // mask_h
            factor_w = padded_im_w // mask_w
            assert factor_h == factor_w
            factor = factor_h
            # aligned upsample instances mask to resized_padded_image shape
            pred_global_masks = aligned_bilinear(
                results.pred_global_logits.sigmoid(), factor)
            pred_global_masks = pred_global_masks[:, :, :resized_in_h, :
                                                  resized_in_w]
            # scale mask from resized_image shape to source image shape
            # this is a inverse procedure of opencv or PIL interpolation
            # which align_corners is False
            pred_global_masks = F.interpolate(pred_global_masks,
                                              size=(output_height,
                                                    output_width),
                                              mode="bilinear",
                                              align_corners=False)
            pred_global_masks = pred_global_masks[:, 0, :, :]
            # filter out the pred masks with low confidence score
            results.pred_masks = pred_global_masks > self.infer_mask_threshold

        return results