def _postprocess(results, result_mask_info, output_height, output_width, mask_threshold=0.5): """ Post-process the output boxes for TensorMask. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will postprocess the raw outputs of TensorMask to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. Note that it does not contain the field `pred_masks`, which is provided by another input `result_masks`. result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results. The first item is a list of #detection tensors, each is the predicted masks. The second item is the anchors corresponding to the predicted masks. output_height, output_width: the desired output resolution. Returns: Instances: the postprocessed output from the model, based on the output resolution """ scale_x, scale_y = ( output_width / results.image_size[1], output_height / results.image_size[0], ) results = Instances((output_height, output_width), **results.get_fields()) output_boxes = results.pred_boxes output_boxes.tensor[:, 0::2] *= scale_x output_boxes.tensor[:, 1::2] *= scale_y output_boxes.clip(results.image_size) inds_nonempty = output_boxes.nonempty() results = results[inds_nonempty] result_masks, result_anchors = result_mask_info if result_masks: result_anchors.tensor[:, 0::2] *= scale_x result_anchors.tensor[:, 1::2] *= scale_y result_masks = [ x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i ] results.pred_masks = _paste_mask_lists_in_image( result_masks, result_anchors[inds_nonempty], results.image_size, threshold=mask_threshold, ) return results
def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) results = Instances((output_height, output_width), **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = paste_masks_in_image( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results
def postprocess(self, results, output_height, output_width, resized_in_h, resized_in_w, padded_im_h, padded_im_w): scale_x, scale_y = (output_width / resized_in_w, output_height / resized_in_h) # gather detection result to Instances results = Instances((output_height, output_width), **results.get_fields()) # scale detection box results from resized_padded_image space to source image space and clip output_boxes = results.pred_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) # filter empty detection in source image space results = results[output_boxes.nonempty()] if results.has("pred_global_logits"): mask_h, mask_w = results.pred_global_logits.shape[-2:] factor_h = padded_im_h // mask_h factor_w = padded_im_w // mask_w assert factor_h == factor_w factor = factor_h # aligned upsample instances mask to resized_padded_image shape pred_global_masks = aligned_bilinear( results.pred_global_logits.sigmoid(), factor) pred_global_masks = pred_global_masks[:, :, :resized_in_h, : resized_in_w] # scale mask from resized_image shape to source image shape # this is a inverse procedure of opencv or PIL interpolation # which align_corners is False pred_global_masks = F.interpolate(pred_global_masks, size=(output_height, output_width), mode="bilinear", align_corners=False) pred_global_masks = pred_global_masks[:, 0, :, :] # filter out the pred masks with low confidence score results.pred_masks = pred_global_masks > self.infer_mask_threshold return results