示例#1
0
 def postprocess_detections(
         self, logits: torch.Tensor,
         proposals: List[torch.Tensor]) -> Tuple[TensorList, TensorList]:
     keypoints_probs, kp_scores = keypointrcnn_inference(logits, proposals)
     proposals = []
     scores = []
     for keypoint_prob, kps in zip(keypoints_probs, kp_scores):
         proposals.append(keypoint_prob)
         scores.append(kps)
     return proposals, scores
    def get_joints(self, features, keypoint_proposals, image_shapes):
        keypoint_features = self.roi_heads.keypoint_roi_pool(
            features, keypoint_proposals, image_shapes)
        keypoint_features = self.roi_heads.keypoint_head(keypoint_features)
        keypoint_logits = self.roi_heads.keypoint_predictor(keypoint_features)

        assert keypoint_logits is not None
        assert keypoint_proposals is not None

        keypoints_probs, kp_scores = keypointrcnn_inference(
            keypoint_logits, keypoint_proposals)
        return keypoints_probs, kp_scores
示例#3
0
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                assert t[
                    "boxes"].dtype.is_floating_point, 'target boxes must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'
                assert t["poses"].dtype.is_floating_point
                if self.has_keypoint:
                    assert t[
                        "keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)

        box_features = self.box_roi_pool(
            features, proposals,
            image_shapes)  # torch.Size([bs*1000, 256, 7, 7])
        box_features = self.box_head(
            box_features)  # torch.Size([bs*1000, 1024])
        class_logits, box_regression = self.box_predictor(
            box_features)  # torch.Size([bs*1000, 2]) torch.Size([bs*1000, 8])

        result, losses = [], {}  # result 是一个字典的列表, 每一个字典存着每张图片的预测值
        if self.training:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier,
                          loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    ))
        # 如果是Mask R-CNN
        if self.has_mask:
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            mask_features = self.mask_roi_pool(features, mask_proposals,
                                               image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training:
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                loss_mask = maskrcnn_loss(mask_logits, mask_proposals,
                                          gt_masks, gt_labels,
                                          pos_matched_idxs)
                loss_mask = dict(loss_mask=loss_mask)
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        if self.has_keypoint:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(
                        proposals[img_id][pos])  # shape=(num_pos, 4)
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            keypoint_features = self.keypoint_roi_pool(features,
                                                       keypoint_proposals,
                                                       image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training:
                gt_keypoints = [t["keypoints"] for t in targets]
                loss_keypoint = keypointrcnn_loss(keypoint_logits,
                                                  keypoint_proposals,
                                                  gt_keypoints,
                                                  pos_matched_idxs)
                loss_keypoint = dict(loss_keypoint=loss_keypoint)
            else:
                keypoints_probs, kp_scores = keypointrcnn_inference(
                    keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores,
                                                 result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        if self.has_pose:
            pose_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                pose_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(
                        1)  # 所有被分配为正样本的proposal的下标
                    pose_proposals.append(
                        proposals[img_id]
                        [pos])  # proposal的box(xmin, ymin, xmax, ymax)
                    pos_matched_idxs.append(
                        matched_idxs[img_id][pos])  # 每个proposal对应哪个target pose
            pose_features = self.pose_roi_pool(features, pose_proposals,
                                               image_shapes)
            pose_features = self.pose_head(pose_features)  #
            pose_regression = self.pose_predictor(pose_features)

            loss_pose = {}
            if self.training:
                gt_poses = [t["poses"]
                            for t in targets]  # a list of (rx, ry, rz, tz)
                loss_pose = posercnn_loss(pose_regression, gt_poses, labels,
                                          pos_matched_idxs)
                loss_pose = dict(loss_pose=loss_pose)
            else:
                pred_poses = postprocess_poses(pose_regression, pose_proposals)
                for poses, r in zip(pred_poses, result):
                    r['poses'] = poses
            losses.update(loss_pose)

            if self.has_trans:
                trans_proposals = [p["boxes"] for p in result]
                if self.training:
                    # during training, only focus on positive boxes
                    num_images = len(proposals)
                    trans_proposals = []
                    pos_matched_idxs = []
                    for img_id in range(num_images):
                        # keep_only_positive_boxes
                        pos = torch.nonzero(labels[img_id] > 0).squeeze(
                            1)  # 所有被分配为正样本的proposal的下标
                        trans_proposals.append(
                            proposals[img_id]
                            [pos])  # proposal的box(xmin, ymin, xmax, ymax)
                        pos_matched_idxs.append(
                            matched_idxs[img_id]
                            [pos])  # 每个proposal对应哪个target pose

                box_features = torch.cat(
                    trans_proposals,
                    dim=0)  # [N, 4]    N=batch_size*num_proposal_per_image
                trans_features = self.translation_head(box_features)
                trans_pred = self.translation_predictor(
                    trans_features, pose_features)

                loss_trans = {}
                if self.training:
                    gt_trans = [t["translations"] for t in targets]
                    # 6DVNET中平移损失的权重是0.05
                    loss_trans = 0.05 * trans_loss(trans_pred, gt_trans,
                                                   labels, pos_matched_idxs)
                    loss_trans = dict(loss_trans=loss_trans)
                else:
                    pred_trans = postprocess_trans(trans_pred, trans_proposals)
                    for translations, r in zip(pred_trans, result):
                        r['translations'] = translations
                losses.update(loss_trans)

        return result, losses
    def forward(self, features, proposals, image_shapes, targets=None, return_loss=False):
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
            return_loss (Bool): return the loss (even if we are in eval mode)
        """
        if targets is not None:
            for t in targets:
                assert t["boxes"].dtype.is_floating_point, 'target boxes must of float type'
                assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'
                # if self.has_keypoint:
                #     assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training or return_loss:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result, losses = [], {}
        if self.training or return_loss:
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg)
        else:
            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    )
                )

        if self.has_mask:
            mask_proposals = [p["boxes"] for p in result]
            if self.training or return_loss:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training or return_loss:
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                loss_mask = maskrcnn_loss(
                    mask_logits, mask_proposals,
                    gt_masks, gt_labels, pos_matched_idxs)
                loss_mask = dict(loss_mask=loss_mask)
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        if self.has_keypoint:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training or return_loss:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training or return_loss:
                gt_keypoints = [t["keypoints"] for t in targets]
                loss_keypoint = keypointrcnn_loss(
                    keypoint_logits, keypoint_proposals,
                    gt_keypoints, pos_matched_idxs)
                loss_keypoint = dict(loss_keypoint=loss_keypoint)
            else:
                keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        return result, losses
示例#5
0
    def forward(self, features, proposals, image_shapes, targets=None):
        """
        Args:
          features: List
          proposals: List
          image_shapes: List
          targets: List (Default value = None)
        Returns:
        """
        maskrcnn_loss_func = maskrcnn_loss
        fastrcnn_loss_func = fastrcnn_loss
        keypointrcnn_loss_func = keypointrcnn_loss

        eval_when_train = not self.training
        try:
            if self._eval_when_train:
                eval_when_train = True
        except AttributeError:
            pass

        if self.maskrcnn_loss_customized is not None:
            maskrcnn_loss_func = self.maskrcnn_loss_customized
        if self.fastrcnn_loss_customized is not None:
            fastrcnn_loss_func = self.fastrcnn_loss_customized
        if self.keypointrcnn_loss_customized is not None:
            keypointrcnn_loss_func = self.keypointrcnn_loss_customized

        if self.training:
            (
                proposals,
                matched_idxs,
                labels,
                regression_targets,
            ) = self.select_training_samples(proposals, targets)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features)
        class_logits, box_regression = self.box_predictor(box_features)

        result, losses = [], {}
        if self.training:
            loss_classifier, loss_box_reg = fastrcnn_loss_func(
                class_logits, box_regression, labels, regression_targets)
            losses = dict(loss_classifier=loss_classifier,
                          loss_box_reg=loss_box_reg)
        if eval_when_train:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append(
                    dict(
                        boxes=boxes[i],
                        labels=labels[i],
                        scores=scores[i],
                    ))

        if self.has_mask:
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            mask_features = self.mask_roi_pool(features, mask_proposals,
                                               image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

            loss_mask = {}
            if self.training:
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                loss_mask = maskrcnn_loss_func(mask_logits, mask_proposals,
                                               gt_masks, gt_labels,
                                               pos_matched_idxs)
                loss_mask = dict(loss_mask=loss_mask)
            if eval_when_train:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        if self.has_keypoint():
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])

            keypoint_features = self.keypoint_roi_pool(features,
                                                       keypoint_proposals,
                                                       image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training:
                gt_keypoints = [t["keypoints"] for t in targets]
                loss_keypoint = keypointrcnn_loss_func(keypoint_logits,
                                                       keypoint_proposals,
                                                       gt_keypoints,
                                                       pos_matched_idxs)
                loss_keypoint = dict(loss_keypoint=loss_keypoint)
            if eval_when_train:
                keypoints_probs, kp_scores = keypointrcnn_inference(
                    keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores,
                                                 result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        return result, losses
示例#6
0
    def forward(self, features, proposals, image_shapes, targets=None):
        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]])
        """
        Arguments:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        """
        if targets is not None:
            for t in targets:
                # TODO: https://github.com/pytorch/pytorch/issues/26731
                floating_point_types = (torch.float, torch.double, torch.half)
                assert t[
                    "boxes"].dtype in floating_point_types, 'target boxes must of float type'
                assert t[
                    "labels"].dtype == torch.int64, 'target labels must of int64 type'
                if self.has_keypoint():
                    assert t[
                        "keypoints"].dtype == torch.float32, 'target keypoints must of float type'

        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(
                proposals, targets)
        else:
            labels = None
            regression_targets = None
            matched_idxs = None

        # compute bbox embedding
        position_matrix = extract_position_matrix(proposals, Nongt_dim)
        position_embedding = extract_position_embedding(position_matrix,
                                                        feat_dim=64)

        box_features = self.box_roi_pool(features, proposals, image_shapes)
        box_features = self.box_head(box_features, position_embedding)
        class_logits, box_regression = self.box_predictor(box_features)

        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
        losses = {}
        if self.training:
            assert labels is not None and regression_targets is not None
            loss_classifier, loss_box_reg = fastrcnn_loss(
                class_logits, box_regression, labels, regression_targets)
            losses = {
                "loss_classifier": loss_classifier,
                "loss_box_reg": loss_box_reg
            }
        else:
            boxes, scores, labels = self.postprocess_detections(
                class_logits, box_regression, proposals, image_shapes)
            num_images = len(boxes)
            for i in range(num_images):
                result.append({
                    "boxes": boxes[i],
                    "labels": labels[i],
                    "scores": scores[i],
                })

        if self.has_mask():
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
                assert matched_idxs is not None
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
                pos_matched_idxs = []
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    mask_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            if self.mask_roi_pool is not None:
                mask_features = self.mask_roi_pool(features, mask_proposals,
                                                   image_shapes)
                mask_features = self.mask_head(mask_features)
                mask_logits = self.mask_predictor(mask_features)
            else:
                mask_logits = torch.tensor(0)
                raise Exception("Expected mask_roi_pool to be not None")

            loss_mask = {}
            if self.training:
                assert targets is not None
                assert pos_matched_idxs is not None
                assert mask_logits is not None

                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
                rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals,
                                               gt_masks, gt_labels,
                                               pos_matched_idxs)
                loss_mask = {"loss_mask": rcnn_loss_mask}
            else:
                labels = [r["labels"] for r in result]
                masks_probs = maskrcnn_inference(mask_logits, labels)
                for mask_prob, r in zip(masks_probs, result):
                    r["masks"] = mask_prob

            losses.update(loss_mask)

        # keep none checks in if conditional so torchscript will conditionally
        # compile each branch
        if self.keypoint_roi_pool is not None and self.keypoint_head is not None \
                and self.keypoint_predictor is not None:
            keypoint_proposals = [p["boxes"] for p in result]
            if self.training:
                # during training, only focus on positive boxes
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
                assert matched_idxs is not None
                for img_id in range(num_images):
                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
                    keypoint_proposals.append(proposals[img_id][pos])
                    pos_matched_idxs.append(matched_idxs[img_id][pos])
            else:
                pos_matched_idxs = None

            keypoint_features = self.keypoint_roi_pool(features,
                                                       keypoint_proposals,
                                                       image_shapes)
            keypoint_features = self.keypoint_head(keypoint_features)
            keypoint_logits = self.keypoint_predictor(keypoint_features)

            loss_keypoint = {}
            if self.training:
                assert targets is not None
                assert pos_matched_idxs is not None

                gt_keypoints = [t["keypoints"] for t in targets]
                rcnn_loss_keypoint = keypointrcnn_loss(keypoint_logits,
                                                       keypoint_proposals,
                                                       gt_keypoints,
                                                       pos_matched_idxs)
                loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
            else:
                assert keypoint_logits is not None
                assert keypoint_proposals is not None

                keypoints_probs, kp_scores = keypointrcnn_inference(
                    keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores,
                                                 result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps

            losses.update(loss_keypoint)

        return result, losses
示例#7
0
def roi_heads_forward(self, features, proposals, image_shapes, targets=None):
    """Hack into the torchvision model to obtain features for
    training caption model; training is assumed to be false

    https://github.com/pytorch/vision/blob/master/
        torchvision/models/detection/roi_heads.py"""

    box_features = self.box_roi_pool(features, proposals, image_shapes)
    box_features = self.box_head(box_features)
    class_logits, box_regression = self.box_predictor(box_features)

    result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])

    boxes, scores, labels, box_features = roi_postprocess_detections(
        self, class_logits, box_regression, proposals, image_shapes,
        box_features)
    num_images = len(boxes)
    for i in range(num_images):
        result.append({
            "boxes_features": box_features[i],
            "boxes": boxes[i],
            "labels": labels[i],
            "scores": scores[i]
        })

    if self.has_mask():
        mask_proposals = [p["boxes"] for p in result]

        if self.mask_roi_pool is not None:
            mask_features = self.mask_roi_pool(features, mask_proposals,
                                               image_shapes)
            mask_features = self.mask_head(mask_features)
            mask_logits = self.mask_predictor(mask_features)

        else:
            mask_logits = torch.tensor(0)
            raise Exception("Expected mask_roi_pool to be not None")

        labels = [r["labels"] for r in result]
        masks_probs = maskrcnn_inference(mask_logits, labels)
        for mask_prob, r in zip(masks_probs, result):
            r["masks_features"] = mask_features
            r["masks"] = mask_prob

    # keep none checks in if conditional so torchscript will conditionally
    # compile each branch
    if self.keypoint_roi_pool is not None and self.keypoint_head is not None \
            and self.keypoint_predictor is not None:
        keypoint_proposals = [p["boxes"] for p in result]

        keypoint_features = self.keypoint_roi_pool(features,
                                                   keypoint_proposals,
                                                   image_shapes)
        keypoint_features = self.keypoint_head(keypoint_features)
        keypoint_logits = self.keypoint_predictor(keypoint_features)

        assert keypoint_logits is not None
        assert keypoint_proposals is not None

        keypoints_probs, kp_scores = keypointrcnn_inference(
            keypoint_logits, keypoint_proposals)
        for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
            r["keypoints_features"] = keypoint_features
            r["keypoints"] = keypoint_prob
            r["keypoints_scores"] = kps

    return result, dict()