示例#1
0
        def f(batched_inputs, c2_inputs, c2_results):
            image_sizes = [[int(im[0]), int(im[1])]
                           for im in c2_inputs["im_info"]]

            num_features = len(
                [x for x in c2_results.keys() if x.startswith("box_cls_")])
            pred_logits = [
                c2_results["box_cls_{}".format(i)] for i in range(num_features)
            ]
            pred_anchor_deltas = [
                c2_results["box_delta_{}".format(i)]
                for i in range(num_features)
            ]

            # For each feature level, feature should have the same batch size and
            # spatial dimension as the box_cls and box_delta.
            dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits]
            anchors = self.anchor_generator(dummy_features)

            # self.num_classess can be inferred
            self.num_classes = pred_logits[0].shape[1] // (
                pred_anchor_deltas[0].shape[1] // 4)

            pred_logits = [
                permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits
            ]
            pred_anchor_deltas = [
                permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas
            ]

            results = self.inference(anchors, pred_logits, pred_anchor_deltas,
                                     image_sizes)
            return meta_arch.GeneralizedRCNN._postprocess(
                results, batched_inputs, image_sizes)
示例#2
0
    def produce_raw_output(self, anchors, features):
        """
        Given anchors and features, produces raw pre-nms output to be used for custom fusion operations.
        """
        # Perform inference run
        pred_logits, pred_anchor_deltas, pred_logits_vars, pred_anchor_deltas_vars = self.head(
            features)

        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(
                x, self.num_classes) for x in pred_logits]
        pred_anchor_deltas = [
            permute_to_N_HWA_K(
                x, 4) for x in pred_anchor_deltas]

        if pred_logits_vars is not None:
            pred_logits_vars = [
                permute_to_N_HWA_K(
                    x, self.num_classes) for x in pred_logits_vars]
        if pred_anchor_deltas_vars is not None:
            pred_anchor_deltas_vars = [permute_to_N_HWA_K(
                x, self.bbox_cov_dims) for x in pred_anchor_deltas_vars]

        # Create raw output dictionary
        raw_output = {'anchors': anchors}

        # Shapes:
        # (N x R, K) for class_logits and class_logits_var.
        # (N x R, 4), (N x R x 10) for pred_anchor_deltas and pred_class_bbox_cov respectively.
        raw_output.update({'box_cls': pred_logits,
                           'box_delta': pred_anchor_deltas,
                           'box_cls_var': pred_logits_vars,
                           'box_reg_var': pred_anchor_deltas_vars})
        return raw_output
示例#3
0
    def convert_outputs(self, batched_inputs, inputs, results):
        output_names = self.get_output_names()
        assert len(results) == len(output_names)

        m_results = {}
        for k, v in results.items():
            assert k in output_names, k
            m_results[k] = v.to(self._ns.device)

        image_sizes = inputs["image_sizes"]

        num_features = len(
            [x for x in m_results.keys() if x.startswith("box_cls_")])
        pred_logits = [
            m_results["box_cls_{}".format(i)] for i in range(num_features)
        ]
        pred_anchor_deltas = [
            m_results["box_delta_{}".format(i)] for i in range(num_features)
        ]

        # generate anchors from anchor_generator
        anchors = self._anchor_generator(pred_logits)

        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(x, self._ns.num_classes) for x in pred_logits
        ]
        pred_anchor_deltas = [
            permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas
        ]
        results = self._ns.inference(anchors, pred_logits, pred_anchor_deltas,
                                     image_sizes)
        return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs,
                                                      image_sizes)
    def convert_outputs(self, batched_inputs, inputs, results):
        assert isinstance(self._wrapped_model, meta_arch.RetinaNet)
        image_sizes = inputs["image_sizes"]

        num_features = len(
            [x for x in results.keys() if x.startswith("box_cls_")])
        pred_logits = [
            results["box_cls_{}".format(i)] for i in range(num_features)
        ]
        pred_anchor_deltas = [
            results["box_delta_{}".format(i)] for i in range(num_features)
        ]

        # generate anchors from wrapped_model anchor_generator
        anchors = self._wrapped_model.anchor_generator(pred_logits)

        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(x, self._wrapped_model.num_classes)
            for x in pred_logits
        ]
        pred_anchor_deltas = [
            permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas
        ]
        results = self._wrapped_model.inference(anchors, pred_logits,
                                                pred_anchor_deltas,
                                                image_sizes)
        return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs,
                                                      image_sizes)
    def inference(self, box_cls, box_delta, landmark_delta, anchors, image_sizes):
        """
        Arguments:
            box_cls, box_delta, landmark_delta: Same as the output of 
                :meth:`RetinaNetHead.forward`
            anchors (list[Boxes]): A list of #feature level Boxes.
                The Boxes contain anchors of this image on the specific feature level.
            image_sizes (List[torch.Size]): the input image sizes

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        results = []

        box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls]
        box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta]
        landmark_delta = [permute_to_N_HWA_K(x, 10) for x in landmark_delta]
        # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4 or 10)

        for img_idx, image_size in enumerate(image_sizes):
            box_cls_per_image = [box_cls_per_level[img_idx] for box_cls_per_level in box_cls]
            box_reg_per_image = [box_reg_per_level[img_idx] for box_reg_per_level in box_delta]
            landmark_reg_per_image = [landmark_reg_per_level[img_idx] for landmark_reg_per_level in landmark_delta]
            results_per_image = self.inference_single_image(
                box_cls_per_image, box_reg_per_image, landmark_reg_per_image, anchors, tuple(
                    image_size)
            )
            results.append(results_per_image)
        return results
def permute_all_cls_box_landmark_to_N_HWA_K_and_concat(box_cls,
                                                       box_delta,
                                                       landmark_delta,
                                                       num_classes=80):
    """
    Rearrange the tensor layout from the network output, i.e.:
    list[Tensor]: #lvl tensors of shape (N, A x K, Hi, Wi)
    to per-image predictions, i.e.:
    Tensor: of shape (N x sum(Hi x Wi x A), K)
    """
    # for each feature level, permute the outputs to make them be in the
    # same format as the labels. Note that the labels are computed for
    # all feature levels concatenated, so we keep the same representation
    # for the objectness and the box_delta
    box_cls_flattened = [permute_to_N_HWA_K(x, num_classes) for x in box_cls]
    box_delta_flattened = [permute_to_N_HWA_K(x, 4) for x in box_delta]
    landmark_delta_flattened = [
        permute_to_N_HWA_K(x, 10) for x in landmark_delta
    ]
    # concatenate on the first dimension (representing the feature levels), to
    # take into account the way the labels were generated (with all feature maps
    # being concatenated as well)
    box_cls = cat(box_cls_flattened, dim=1).view(-1, num_classes)
    box_delta = cat(box_delta_flattened, dim=1).view(-1, 4)
    landmark_delta = cat(landmark_delta_flattened, dim=1).view(-1, 10)
    return box_cls, box_delta, landmark_delta
示例#7
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[f] for f in self.in_features]

        anchors = self.anchor_generator(features)
        pred_logits, pred_anchor_deltas = self.head(features)
        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
        pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]

        if self.training:
            assert "instances" in batched_inputs[0], "Instance annotations are missing in training!"
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]

            gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances)
            losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes)

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(
                        anchors, pred_logits, pred_anchor_deltas, images.image_sizes
                    )
                    self.visualize_training(batched_inputs, results)

            return losses
        else:
            results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
示例#8
0
def retinanet_postprocess(torch_model, images, results):
    features = results[:5]
    pred_logits = results[5:10]
    pred_anchor_deltas = results[10:]

    anchors = torch_model.anchor_generator(features)
    pred_logits = [permute_to_N_HWA_K(x, torch_model.num_classes) for x in pred_logits]
    pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
    results = torch_model.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)

    return results
示例#9
0
def permute_all_cls_and_box_to_N_HWA_K_and_concat(pred_logits, pred_anchor_deltas, num_classes=80):
    """
    Rearrange the tensor layout from the network output, i.e.:
    list[Tensor]: #lvl tensors of shape (N, A x K, Hi, Wi)
    to per-image predictions, i.e.:
    Tensor: of shape (N x sum(Hi x Wi x A), K)
    """
    # for each feature level, permute the outputs to make them be in the
    # same format as the labels.
    pred_logits_flattened = [permute_to_N_HWA_K(x, num_classes) for x in pred_logits]
    pred_anchor_deltas_flattened = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
    # concatenate on the first dimension (representing the feature levels), to
    # take into account the way the labels were generated (with all feature maps
    # being concatenated as well)
    pred_logits = cat(pred_logits_flattened, dim=1).view(-1, num_classes)
    pred_anchor_deltas = cat(pred_anchor_deltas_flattened, dim=1).view(-1, 4)
    return pred_logits, pred_anchor_deltas
示例#10
0
    def inference(self, pred_logits, pred_deltas, pred_masks, anchors, indexes,
                  images):
        """
        Arguments:
            pred_logits, pred_deltas, pred_masks: Same as the output of:
                meth:`TensorMaskHead.forward`
            anchors, indexes: Same as the input of meth:`TensorMask.get_ground_truth`
            images (ImageList): the input images

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(anchors) == len(images)
        results = []

        pred_logits = [
            permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits
        ]
        pred_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_deltas]

        pred_logits = cat(pred_logits, dim=1)
        pred_deltas = cat(pred_deltas, dim=1)

        for img_idx, (anchors_im,
                      indexes_im) in enumerate(zip(anchors, indexes)):
            # Get the size of the current image
            image_size = images.image_sizes[img_idx]

            logits_im = pred_logits[img_idx]
            deltas_im = pred_deltas[img_idx]

            if self.mask_on:
                masks_im = [[mla[img_idx] for mla in ml] for ml in pred_masks]
            else:
                masks_im = [None] * self.num_levels
            results_im = self.inference_single_image(
                logits_im,
                deltas_im,
                masks_im,
                Boxes.cat(anchors_im),
                cat(indexes_im),
                tuple(image_size),
            )
            results.append(results_im)
        return results
示例#11
0
    def forward(
        self, batched_inputs: List[dict]
    ) -> Union[Dict[str, Any], List[Dict[str, Instances]]]:
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            dict[str: Tensor]:  Mapping from a named loss to a tensor storing the loss.
                                    Used during training only.
        """
        images: ImageList = self.preprocess_image(batched_inputs)

        features_dict: Dict[str, ShapeSpec] = self.fpn(images.tensor)
        features: List[ShapeSpec] = [
            features_dict[f] for f in self.in_features
        ]
        pred_logits, pred_anchor_deltas = self.head(features)
        anchors: List[Boxes] = self.anchor_generator(features)

        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(x, K=self.num_classes) for x in pred_logits
        ]

        pred_anchor_deltas = [
            permute_to_N_HWA_K(x, K=4) for x in pred_anchor_deltas
        ]

        if self.training:
            gt_instances: Instances = [
                x['instances'].to(self.device) for x in batched_inputs
            ]
            gt_classes, gt_boxes = self.get_ground_truth(
                anchors=anchors, gt_instances=gt_instances)
            losses: Dict[str, float] = self.losses(
                anchors=anchors,
                pred_logits=pred_logits,
                gt_classes=gt_classes,
                pred_anchor_deltas=pred_anchor_deltas,
                gt_boxes=gt_boxes)

            return losses

        # Otherwise, do inference.
        results: List[Instances] = self.inference(
            anchors=anchors,
            pred_logits=pred_logits,
            pred_anchor_deltas=pred_anchor_deltas,
            image_sizes=images.image_sizes)

        processed_results: List[Dict[str, Any]] = []
        for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            processed_results.append({
                "instances":
                detector_postprocess(results_per_image, height, width)
            })
        return processed_results
示例#12
0
    def forward(
            self,
            batched_inputs,
            return_anchorwise_output=False,
            num_mc_dropout_runs=-1):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

            return_anchorwise_output (bool): returns raw output for probabilistic inference

            num_mc_dropout_runs (int): perform efficient monte-carlo dropout runs by running only the head and
            not full neural network.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        # Preprocess image
        images = self.preprocess_image(batched_inputs)

        # Extract features and generate anchors
        features = self.backbone(images.tensor)
        features = [features[f] for f in self.head_in_features]
        anchors = self.anchor_generator(features)

        # MC_Dropout inference forward
        if num_mc_dropout_runs > 1:
            anchors = anchors * num_mc_dropout_runs
            features = features * num_mc_dropout_runs
            output_dict = self.produce_raw_output(anchors, features)
            return output_dict

        # Regular inference forward
        if return_anchorwise_output:
            return self.produce_raw_output(anchors, features)

        # Training and validation forward
        pred_logits, pred_anchor_deltas, pred_logits_vars, pred_anchor_deltas_vars = self.head(
            features)
        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(
                x, self.num_classes) for x in pred_logits]
        pred_anchor_deltas = [
            permute_to_N_HWA_K(
                x, 4) for x in pred_anchor_deltas]

        if pred_logits_vars is not None:
            pred_logits_vars = [
                permute_to_N_HWA_K(
                    x, self.num_classes) for x in pred_logits_vars]
        if pred_anchor_deltas_vars is not None:
            pred_anchor_deltas_vars = [permute_to_N_HWA_K(
                x, self.bbox_cov_dims) for x in pred_anchor_deltas_vars]

        if self.training:
            assert "instances" in batched_inputs[0], "Instance annotations are missing in training!"
            gt_instances = [
                x["instances"].to(
                    self.device) for x in batched_inputs]

            gt_classes, gt_boxes = self.label_anchors(
                anchors, gt_instances)

            self.anchors = torch.cat(
                [Boxes.cat(anchors).tensor for i in range(len(gt_instances))], 0)

            # Loss is computed based on what values are to be estimated by the neural
            # network
            losses = self.losses(
                anchors,
                gt_classes,
                gt_boxes,
                pred_logits,
                pred_anchor_deltas,
                pred_logits_vars,
                pred_anchor_deltas_vars)

            self.current_step += 1

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(
                        anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
                    self.visualize_training(batched_inputs, results)
            return losses
        else:
            results = self.inference(
                anchors,
                pred_logits,
                pred_anchor_deltas,
                images.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                results, batched_inputs, images.image_sizes
            ):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
    def forward(self, batched_inputs: List[dict]) -> Union[Dict[str, Any],
                                                           List[Dict[str, Instances]]]:
        """
        Args:
            batched_inputs (List[dict]):
                A list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            Dict[str, Tensor]:  Mapping from a named loss to a scalar tensor storing the loss.
                                    Used during training only. The dict keys are:
                                    'panel_loss_cls', 'panel_loss_box_reg', 'label_loss_cls'
                                    and 'label_loss_box_reg'.
        """
        images: ImageList = self.preprocess_image(batched_inputs)

        # detected panels
        panel_features_dict: Dict[str, ShapeSpec] = self.panel_fpn(images.tensor)
        panel_features: List[ShapeSpec] = [panel_features_dict[f] for f in self.panel_in_features]
        panel_anchors: List[Boxes] = self.panel_anchor_generator(panel_features)
        panel_pred_logits, panel_pred_anchor_deltas = self.panel_head(panel_features)
        # Transpose the Hi*Wi*A dimension to the middle:
        panel_pred_logits = [permute_to_N_HWA_K(x, K=1)
                             for x in panel_pred_logits]

        panel_pred_anchor_deltas = [permute_to_N_HWA_K(x, K=4)
                                    for x in panel_pred_anchor_deltas]

        # detected labels
        label_features_dict: Dict[str, ShapeSpec] = self.label_fpn(images.tensor)
        label_features: List[ShapeSpec] = [label_features_dict[f]
                                           for f in self.label_in_features]
        label_anchors: List[Boxes] = self.label_anchor_generator(label_features)
        label_pred_logits, label_pred_anchor_deltas = self.label_head(label_features)
        # Transpose the Hi*Wi*A dimension to the middle:
        label_pred_logits = [permute_to_N_HWA_K(x, K=self.num_label_classes)
                             for x in label_pred_logits]

        label_pred_anchor_deltas = [permute_to_N_HWA_K(x, K=4)
                                    for x in label_pred_anchor_deltas]

        # Training
        if self.training:

            # Panels
            panel_gt_instances: Instances = [x['panel_instances'].to(self.device)
                                             for x in batched_inputs]

            panel_gt_classes, panel_gt_boxes = self.get_ground_truth(
                anchors=panel_anchors,
                gt_instances=panel_gt_instances,
                num_classes=1)

            panel_loss_cls, panel_loss_box_reg = self._compute_single_head_losses(
                anchors=panel_anchors,
                pred_logits=panel_pred_logits,
                gt_classes=panel_gt_classes,
                pred_anchor_deltas=panel_pred_anchor_deltas,
                gt_boxes=panel_gt_boxes,
                num_classes=1)


            loss_dict: Dict[str, float] = {
                'panel_loss_cls': panel_loss_cls,
                'panel_loss_box_reg': panel_loss_box_reg
            }

            # Labels
            label_gt_instances: Instances = [x['label_instances'].to(self.device)
                                             for x in batched_inputs]

            label_gt_classes, label_gt_boxes = self.get_ground_truth(
                anchors=label_anchors,
                gt_instances=label_gt_instances,
                num_classes=self.num_label_classes)

            label_loss_cls, label_loss_box_reg = self._compute_single_head_losses(
                anchors=label_anchors,
                pred_logits=label_pred_logits,
                gt_classes=label_gt_classes,
                pred_anchor_deltas=label_pred_anchor_deltas,
                gt_boxes=label_gt_boxes,
                num_classes=self.num_label_classes)

            loss_dict['label_loss_cls'] = label_loss_cls
            loss_dict['label_loss_box_reg'] = label_loss_box_reg

            return loss_dict

        # Otherwise, do inference.
        batched_inference_results = self.inference(
            panel_anchors=panel_anchors,
            panel_pred_logits=panel_pred_logits,
            panel_pred_anchor_deltas=panel_pred_anchor_deltas,
            label_anchors=label_anchors,
            label_pred_logits=label_pred_logits,
            label_pred_anchor_deltas=label_pred_anchor_deltas,
            image_sizes=images.image_sizes)

        processed_results: List[Dict[str, Instances]] = []

        for inference_results, input_per_image, image_size in zip(
                batched_inference_results,
                batched_inputs,
                images.image_sizes):

            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            # TODO check that this work with two sets of boxes
            # r = detector_postprocess(results_per_image, height, width)

            panel_results, label_results = inference_results

            scale_x, scale_y = (width / panel_results.image_size[1],
                                height / panel_results.image_size[0])

            # 1) Panels
            panel_results = Instances((height,
                                       width),
                                      **panel_results.get_fields())

            # Clip and scale boxes
            panel_output_boxes = panel_results.pred_boxes
            panel_output_boxes.scale(scale_x, scale_y)
            panel_output_boxes.clip(panel_results.image_size)

            # 2) Labels
            label_results = Instances((height,
                                       width),
                                      **label_results.get_fields())

            # Clip and scale boxes
            label_output_boxes = label_results.pred_boxes
            label_output_boxes.scale(scale_x, scale_y)
            label_output_boxes.clip(label_results.image_size)

            processed_results.append({"panels": panel_results, "labels": label_results})

        return processed_results