Пример #1
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        """
        images, images_whwh = self.preprocess_image(batched_inputs)
        if isinstance(images, (list, torch.Tensor)):
            images = nested_tensor_from_tensor_list(images)

        # Feature Extraction.
        src = self.backbone(images.tensor)
        features = list()
        for f in self.in_features:
            feature = src[f]
            features.append(feature)

        # Cls & Reg Prediction.
        outputs_class, outputs_coord = self.head(features)

        output = {'pred_logits': outputs_class, 'pred_boxes': outputs_coord}

        if self.training:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            targets = self.prepare_targets(gt_instances)
            loss_dict = self.criterion(output, targets)

            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]

            return loss_dict

        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            results = self.inference(box_cls, box_pred, images.image_sizes)

            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})

            return processed_results
Пример #2
0
    def forward(
            self,
            batched_inputs,
            return_raw_results=False,
            is_mc_dropout=False):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

            return_raw_results (bool): if True return unprocessed results for probabilistic inference.
            is_mc_dropout (bool): if True, return unprocessed results even if self.is_training flag is on.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        images = self.preprocess_image(batched_inputs)
        output = self.detr(images)

        if self.training and not is_mc_dropout:
            gt_instances = [
                x["instances"].to(
                    self.device) for x in batched_inputs]

            targets = self.prepare_targets(gt_instances)

            loss_dict = self.criterion(output, targets)
            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]

            self.current_step += 1
            return loss_dict
        elif return_raw_results:
            return output
        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            mask_pred = output["pred_masks"] if self.mask_on else None
            results = self.inference(box_cls, box_pred, mask_pred, images.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Пример #3
0
    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:
                * image: Tensor, image in (C, H, W) format.
                * instances: Instances
                Other information that's included in the original dicts,
                such as:
                * "height", "width" (int): the output resolution of the model,
                  used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            in training, dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used
                during training only.
            in inference, the standard output format, described in
            :doc:`/tutorials/models`.
        """
        nums_images = len(batched_inputs)
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[self.backbone_level]]
        anchors_image = self.anchor_generator(features)
        anchors = [copy.deepcopy(anchors_image) for _ in range(nums_images)]
        pred_logits, pred_anchor_deltas = self.decoder(
            self.encoder(features[0]))

        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [permute_to_N_HWA_K(pred_logits, self.nums_classes)]
        pred_anchor_deltas = [permute_to_N_HWA_K(pred_anchor_deltas, 4)]

        if self.training:
            assert not torch.jit.is_scripting(), "Not supported"
            assert "instances" in batched_inputs[
                0], "Instance annotations are missing in training"
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            indices = self.get_ground_truth(anchors, pred_anchor_deltas,
                                            gt_instances)
            loss = self.losses(indices, gt_instances, anchors, pred_logits,
                               pred_anchor_deltas)
            return loss
        else:
            results = self.inference(anchors_image, pred_logits,
                                     pred_anchor_deltas, images.image_sizes)
            if torch.jit.is_scripting():
                return results
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Пример #4
0
    def inference(self,
                  batched_inputs,
                  detected_instances=None,
                  do_postprocess=True):
        """
        Run inference on the given inputs.

        Args:
            batched_inputs (list[dict]): same as in :meth:`forward`
            detected_instances (None or list[Instances]): if not None, it
                contains an `Instances` object per image. The `Instances`
                object contains "pred_boxes" and "pred_classes" which are
                known boxes in the image.
                The inference will then skip the detection of bounding boxes,
                and only predict other per-ROI outputs.
            do_postprocess (bool): whether to apply post-processing on the outputs.

        Returns:
            same as in :meth:`forward`.
        """
        assert not self.training

        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        if detected_instances is None:
            if self.proposal_generator:
                proposals, _ = self.proposal_generator(images, features, None)
            else:
                assert "proposals" in batched_inputs[0]
                proposals = [
                    x["proposals"].to(self.device) for x in batched_inputs
                ]

            results, _ = self.roi_heads(images, features, proposals, None)
        else:
            detected_instances = [
                x.to(self.device) for x in detected_instances
            ]
            results = self.roi_heads.forward_with_given_boxes(
                features, detected_instances)

        if do_postprocess:
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
        else:
            return results
Пример #5
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        images_lists = self.preprocess_image(batched_inputs)
        # convert images_lists to Nested Tensor?
        nested_images = self.imagelist_to_nestedtensor(images_lists)
        output = self.detr(nested_images)

        if self.training:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]

            # targets: List[Dict[str, torch.Tensor]]. Keys
            # "labels": [NUM_BOX,]
            # "boxes": [NUM_BOX, 4]
            targets = self.prepare_targets(gt_instances)
            loss_dict = self.criterion(output, targets)
            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]
            return loss_dict
        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            mask_pred = output["pred_masks"] if self.mask_on else None
            results = self.inference(box_cls, box_pred, mask_pred,
                                     images_lists.image_sizes)
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images_lists.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
Пример #6
0
 def _postprocess(instances, batched_inputs, image_sizes):
     """
     Rescale the output instances to the target size.
     """
     # note: private function; subject to changes
     processed_results = []
     for results_per_image, input_per_image, image_size in zip(
             instances, batched_inputs, image_sizes):
         height = input_per_image.get("height", image_size[0])
         width = input_per_image.get("width", image_size[1])
         r = detector_postprocess(results_per_image, height, width)
         processed_results.append({"instances": r})
     return processed_results
Пример #7
0
    def inference(self, batched_inputs, do_postprocess=True):
        images = self.preprocess_image(batched_inputs)
        inp = images.tensor
        features = self.backbone(inp)
        proposals, _ = self.proposal_generator(images, features, None)

        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
            proposals, batched_inputs, images.image_sizes):
            if do_postprocess:
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            else:
                r = results_per_image
                processed_results.append(r)
        return processed_results
Пример #8
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        """
        images, images_whwh = self.preprocess_image(batched_inputs)
        if isinstance(images, (list, torch.Tensor)):
            images = nested_tensor_from_tensor_list(images)

        # Feature Extraction.
        src = self.backbone(images.tensor)

        features = list()
        for f in self.in_features:
            feature = src[f]
            features.append(feature)

        # Prepare Proposals.
        proposal_boxes = self.init_proposal_boxes.weight.clone()
        proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes)
        proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :]

        img_feats = self.IFE(features)
        bs = len(features[0])
        pos_embeddings = self.pos_embeddings.weight[None].repeat(bs, 1, 1)
        proposal_feats = img_feats + pos_embeddings

        del img_feats

        if self.training:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            targets = self.prepare_targets(gt_instances)

            outputs_class, outputs_coord, outputs_mask = self.head(
                features, proposal_boxes, proposal_feats, targets)
            output = {
                'pred_logits': outputs_class[-1],
                'pred_boxes': outputs_coord[-1],
                'pred_masks': outputs_mask[-1]
            }

            if self.deep_supervision:
                output['aux_outputs'] = [{
                    'pred_logits': a,
                    'pred_boxes': b,
                    'pred_masks': c
                } for a, b, c in zip(outputs_class[:-1], outputs_coord[:-1],
                                     outputs_mask[:-1])]

            loss_dict = self.criterion(output, targets, self.mask_encoding)
            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]

            return loss_dict

        else:
            outputs_class, outputs_coord, outputs_mask = self.head(
                features, proposal_boxes, proposal_feats)
            output = {
                'pred_logits': outputs_class[-1],
                'pred_boxes': outputs_coord[-1],
                'pred_masks': outputs_mask[-1]
            }

            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            mask_pred = output["pred_masks"].unsqueeze(dim=2)

            results = self.inference(box_cls, box_pred, mask_pred,
                                     images.image_sizes)

            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})

            return processed_results
Пример #9
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        """
        images, images_whwh = self.preprocess_image(batched_inputs)
        if isinstance(images, (list, torch.Tensor)):
            images = nested_tensor_from_tensor_list(images)

        # Feature Extraction.
        src = self.backbone(images.tensor)
        features = list()        
        for f in self.in_features:
            feature = src[f]
            features.append(feature)
        # print('!!! pin1\n', len(features), features[0].size(), '\n!!!pin1')

        # Prepare Proposals.
        proposal_boxes = self.init_proposal_boxes.weight.clone()
        proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes)
        proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :] # proposal size: absolute size, x1y1, x2y2

        # Prediction.
        outputs_class, outputs_coord, bboxes = self.head(features, proposal_boxes, self.init_proposal_features.weight)

        #TODO #3 mask forward
        # print('!!! pin3\n', bboxes.size(), '\n!!!pin3')

        output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1]}

        #TODO #3 mask forward
        
        # print('!!! pin2\n', mask_features.size(), '\n!!!pin2')
        proposal_list_instances = self.boxes2list_instances(bboxes, images.image_sizes)
        # print('!!! pin4\n', len(proposal_list_instances), len(proposal_list_instances[0]), '\n!!!pin4')

        if self.training:
            #TODO #3 mask forward
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
            proposals_gt = self.label_and_sample_proposals(proposal_list_instances, gt_instances)
            # print('!!! pin5\n', len(proposals_gt), len(proposals_gt[0]), '\n!!!pin5')

            instances_fg, _ = select_foreground_proposals(proposals_gt, self.num_classes)
            # print('\ninstances_fg\n', len(proposals_gt), len(instances_fg), '\ninstances_fg\n')
            boxes_fg = [x.proposal_boxes for x in instances_fg]
            mask_features = self.mask_pooler(features, boxes_fg)
            # print('!!! pin6\n', len(instances_fg), len(instances_fg[0]), '\n!!!pin6')
            
            targets = self.prepare_targets(gt_instances)
            if self.deep_supervision:
                output['aux_outputs'] = [{'pred_logits': a, 'pred_boxes': b}
                                         for a, b in zip(outputs_class[:-1], outputs_coord[:-1])]

            loss_dict, match_indices = self.criterion(output, targets)
            # print('!!!pin1\n', loss_dict.keys(), '\n!!!pin1')


            #TODO #4 mask loss update
            loss_dict.update(self.mask_head(mask_features, instances_fg))
            # print('!!!pin2\n', loss_dict.keys(), '\n!!!pin2')


            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]
            return loss_dict

        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            results = self.inference(box_cls, box_pred, images.image_sizes)
            #TODO #5 mask inference
            boxes_list = []
            for boxes_per_image in bboxes:
                boxes_list.append(Boxes(boxes_per_image))

            mask_features = self.mask_pooler(features, boxes_list)
            self.mask_head(mask_features, results)

            processed_results = []
            for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            
            return processed_results
Пример #10
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`.
                  See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """

        image_path = [x['file_name'] for x in batched_inputs]
        if self.training:
            flips = [x['flip'] for x in batched_inputs]
        else:
            flips = None

        if self.training:
            exemplar_input = self.get_exemplar_input(image_path, sample_size=1)
            if exemplar_input is not None:
                l = len(batched_inputs)
                batched_inputs = batched_inputs + exemplar_input
                images, features, proposals, gt_instances, gt_integral_sem_seg, _, losses = self._forward(
                    batched_inputs)
                exemplar_features = {}
                for k, v in features.items():
                    exemplar_features[k] = v[l:]
                exemplar_gt_instances = gt_instances[l:]
                image_path = [x['file_name'] for x in batched_inputs]
                if self.training:
                    exemplar_flips = [x['flip'] for x in batched_inputs]
                else:
                    exemplar_flips = None
                with torch.no_grad():
                    exemplar_info = self.roi_heads.get_box_features(
                        exemplar_features, exemplar_gt_instances)
                detector_results, detector_losses = self.roi_heads(
                    images,
                    features,
                    proposals,
                    gt_instances,
                    gt_integral_sem_seg,
                    image_path=image_path,
                    flips=exemplar_flips,
                    exemplar_info=exemplar_info)
                del exemplar_info, exemplar_input
            else:
                exemplar_info = None
                images, features, proposals, gt_instances, gt_integral_sem_seg, _, losses = self._forward(
                    batched_inputs)
                detector_results, detector_losses = self.roi_heads(
                    images,
                    features,
                    proposals,
                    gt_instances,
                    gt_integral_sem_seg,
                    image_path=image_path,
                    flips=flips,
                    exemplar_info=exemplar_info)
        else:
            exemplar_info = None
            images, features, proposals, gt_instances, gt_integral_sem_seg, sem_seg_results, losses = self._forward(
                batched_inputs)
            detector_results, detector_losses = self.roi_heads(
                images,
                features,
                proposals,
                gt_instances,
                gt_integral_sem_seg,
                image_path=image_path,
                flips=flips,
                exemplar_info=exemplar_info)

        if self.training:
            losses.update({
                k: v * self.instance_loss_weight
                for k, v in detector_losses.items()
            })
            return losses

        processed_results = []
        for sem_seg_result, detector_result, input_per_image, image_size in zip(
                sem_seg_results, detector_results, batched_inputs,
                images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height,
                                            width)
            detector_r = detector_postprocess(detector_result, height, width)

            processed_results.append({
                "sem_seg": sem_seg_r,
                "instances": detector_r
            })

            if self.combine_on:
                panoptic_r = combine_semantic_and_instance_outputs(
                    detector_r,
                    sem_seg_r.argmax(dim=0),
                    self.combine_overlap_threshold,
                    self.combine_stuff_area_limit,
                    self.combine_instances_confidence_threshold,
                )
                processed_results[-1]["panoptic_seg"] = panoptic_r
        return processed_results
Пример #11
0
    def postprogress(self, results, batched_inputs, image_sizes):
        processed_results = []
        tumor_mask_root = './test_tumor_whole'
        wall_mask_root = './test_wall_whole'
        pred_tumor_path = './predictionTumor'
        pred_wall_path = './predictionWall'
        if not os.path.exists(pred_tumor_path):
            os.mkdir(pred_tumor_path)
        if not os.path.exists(pred_wall_path):
            os.mkdir(pred_wall_path)

        pred_tumor_masks = []
        pred_wall_masks = []
        gt_wall_masks = []
        gt_tumor_masks = []
        for results_per_image, input_per_image, image_size in zip(results, batched_inputs, image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})

            if self.mask_on:
                img_name = input_per_image['file_name'].split('/')[-1]

                dice_path = 'diceLog.csv'
                readstyle = 'a+'
                if img_name in pd.read_csv(dice_path, usecols=['SubjectID']):
                    readstyle = "w+"

                if r.pred_classes.shape == torch.Size([0]) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0): #没有检测出目标
                    with open(dice_path, readstyle, newline='') as file:
                        csv_file = csv.writer(file)
                        datas = [img_name, 1., 1., 1.]
                        csv_file.writerow(datas)
                    continue
                elif r.pred_classes.shape == torch.Size([0]) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) != 0):
                    with open(dice_path, readstyle, newline='') as file:
                        csv_file = csv.writer(file)
                        datas = [img_name, 1., 0, 0.5]
                        csv_file.writerow(datas)
                    continue
                elif r.pred_classes.shape == torch.Size([0]) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) != 0) and \
                        (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0):
                    with open(dice_path, readstyle, newline='') as file:
                        csv_file = csv.writer(file)
                        datas = [img_name, 0, 1., 0.5]
                        csv_file.writerow(datas)
                    continue

                if r.pred_classes.min().cpu().numpy() == 0 and (    #兩個类都有
                        r.pred_classes.max().cpu().numpy() == self.num_classes - 1):
                    for i in range(self.num_classes):
                        pred_sum = np.zeros((r.pred_masks.shape[1], r.pred_masks.shape[2]), dtype=np.int32)
                        pred_classes = r.pred_classes
                        index = (pred_classes == i).nonzero()
                        index = index.reshape(len(index))
                        scores, idx = r.scores[index].sort(descending=True)
                        pred_masks = r.pred_masks[index][idx]  # (7,310,420)
                        if i == 0:
                            for j in range(pred_masks.shape[0]):
                                pred_sum += pred_masks[j].int().cpu().numpy()
                            pred_sum[pred_sum >= 2] = 1
                            assert pred_sum.max() <= 1
                            pred = (pred_sum * 255).astype(np.uint8)
                            cv2.imwrite(os.path.join(pred_tumor_path, img_name),pred)
                            pred_tumor_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda'))
                            mask_path = os.path.join(tumor_mask_root, img_name)
                            mask_arr = cv2.imread(mask_path, flags=0)  # (320,410)
                            gt_tumor_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda())
                        else:
                            for j in range(pred_masks.shape[0]):
                                pred_sum += pred_masks[j].int().cpu().numpy()
                            pred_sum[pred_sum >= 2] = 1
                            assert pred_sum.max() <= 1
                            pred = (pred_sum * 255).astype(np.uint8)
                            cv2.imwrite(os.path.join(pred_wall_path, img_name),pred)
                            pred_wall_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda'))
                            mask_path = os.path.join(wall_mask_root, img_name)
                            mask_arr = cv2.imread(mask_path, flags=0)
                            gt_wall_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda())
                else:
                    scores, idx = r.scores.sort(descending=True)
                    pred_masks = r.pred_masks[idx]  # (7,310,4)
                    pred_sum = np.zeros((r.pred_masks.shape[1], r.pred_masks.shape[2]), dtype=np.int32)
                    if r.pred_classes.max().item() == 0:
                        for i in range(pred_masks.shape[0]):
                            pred_sum += pred_masks[i].int().cpu().numpy()
                        pred_sum[pred_sum >= 2] = 1
                        assert pred_sum.max() <= 1
                        pred = (pred_sum * 255).astype(np.uint8)
                        cv2.imwrite(os.path.join(pred_tumor_path, img_name),pred)
                        pred_tumor_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda'))
                        mask_path = os.path.join(tumor_mask_root, img_name)
                        mask_arr = cv2.imread(mask_path, flags=0)
                        gt_tumor_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda())
                    else:
                        for i in range(pred_masks.shape[0]):
                            pred_sum += pred_masks[i].int().cpu().numpy()
                        pred_sum[pred_sum >= 2] = 1
                        assert pred_sum.max() <= 1
                        pred = (pred_sum * 255).astype(np.uint8)
                        cv2.imwrite(os.path.join(pred_wall_path, img_name), pred)
                        pred_wall_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda'))
                        mask_path = os.path.join(wall_mask_root, img_name)
                        mask_arr = cv2.imread(mask_path, flags=0)
                        gt_wall_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda())

                if pred_tumor_masks:
                    tumor_dice_mean = dice_coefficient(torch.cat(pred_tumor_masks, dim=1).view(len(pred_tumor_masks), -1),
                                                       torch.cat(gt_tumor_masks, dim=1).view(len(pred_tumor_masks), -1))
                elif cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0).max() == 0:
                    tumor_dice_mean = torch.tensor(1.).to(self.device)
                else:
                    tumor_dice_mean = torch.tensor(0.).to(self.device)

                if pred_wall_masks:
                    wall_dice_mean = dice_coefficient(torch.cat(pred_wall_masks, dim=1).view(len(pred_wall_masks), -1),
                                                      torch.cat(gt_wall_masks, dim=1).view(len(pred_wall_masks), -1))
                elif cv2.imread(os.path.join(wall_mask_root, img_name), flags=0).max() == 0:
                    wall_dice_mean = torch.tensor(1.).to(self.device)
                else:
                    wall_dice_mean = torch.tensor(0.).to(self.device)
                dice_mean = (tumor_dice_mean + wall_dice_mean) / 2

                with open(dice_path, readstyle, newline='') as file:
                    csv_file = csv.writer(file)
                    datas = [img_name, tumor_dice_mean.item(), wall_dice_mean.item(), dice_mean.item()]
                    csv_file.writerow(datas)

        return processed_results
Пример #12
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        if self.training:
            # prepare images & gt.
            images = self.preprocess_image(batched_inputs)
            if isinstance(batched_inputs[0], tuple):
                gt_instances = list()
                for paired_inputs in batched_inputs:
                    paired_instances = [
                        x["instances"].to(self.device) for x in paired_inputs
                    ]
                    gt_instances.append(paired_instances)
            else:
                gt_instances = [
                    x["instances"].to(self.device) for x in batched_inputs
                ]

            # detection first.
            output = self.detr(copy.deepcopy(images))
            output, pre_embed = output
            if self.track_on:
                # generate targets for tracking.
                targets = self.prepare_targets_for_tracking(
                    gt_instances)  # cur_targets / pre_targets.
                # compute loss for detection (pre frame) & generate indices.
                loss_det, indices_det = self.criterion(output,
                                                       targets[1],
                                                       track_on=False)

                # track.
                if self.track_aug:
                    track_embed = list()
                    track_indices = list()
                    for i, indices in enumerate(indices_det):
                        embedding = pre_embed[i]
                        indices_pos = indices[0]
                        size_embedding = len(embedding)
                        size_pos = len(indices_pos)
                        indices_cand = torch.ones(size_embedding)
                        indices_cand[indices_pos] = 0
                        indices_neg = indices_cand.nonzero().squeeze(1)
                        assert len(indices_pos) <= len(indices_neg)
                        indices_select = torch.randperm(
                            len(indices_neg))[:size_pos]
                        indices_neg = indices_neg[indices_select]
                        # make a copy/aug.
                        embedding[indices_neg, :] = embedding[indices_pos, :]
                        track_embed.append(embedding.unsqueeze(0))
                        track_indices.append(
                            tuple(list(indices) +
                                  [indices_neg]))  # pos / gt / neg.
                    track_embed = torch.cat(track_embed, 0).permute(1, 0, 2)
                else:
                    track_embed = pre_embed.permute(1, 0, 2)
                    track_indices = indices_det

                output = self.detr(images, pre_embed=track_embed)
                output, _ = output
                # compute loss for tracking (cur frame).
                loss_track, _ = self.criterion(output,
                                               targets,
                                               indices_track=track_indices,
                                               track_on=True)
                # aggregate losses.
                losses_dict = dict()
                weight_dict = self.criterion.weight_dict
                loss_candidates = [loss_det, loss_track]
                if self.freeze_det:
                    loss_candidates.pop(0)
                for loss_dict in loss_candidates:
                    for k in loss_dict.keys():
                        if k in weight_dict:
                            if k not in losses_dict.keys():
                                losses_dict[k] = loss_dict[k] * weight_dict[k]
                            else:
                                losses_dict[k] = (
                                    losses_dict[k] +
                                    loss_dict[k] * weight_dict[k]) / 2
                return losses_dict
            else:
                raise NotImplementedError
                targets = self.prepare_targets(gt_instances)
                loss_dict = self.criterion(output, targets)
                weight_dict = self.criterion.weight_dict
                for k in loss_dict.keys():
                    if k in weight_dict:
                        loss_dict[k] *= weight_dict[k]
                return loss_dict
        else:
            assert len(batched_inputs) == 1, \
                print("Only support ONE image each time during inference, "
                      "while there are {} images now.".format(len(batched_inputs)))
            # prepare images.
            images = self.preprocess_image(batched_inputs)
            if isinstance(batched_inputs[0], tuple):
                cur_input = batched_inputs[0][0]
                pre_embed = cur_input.get("pre_embed", None)
            else:
                raise NotImplementedError

            # inference.
            output = self.detr(images, pre_embed=pre_embed)
            output, pre_embed = output
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            if self.track_on:
                box_track = output["pred_tracks"]
                results = self.inference(box_cls,
                                         box_pred,
                                         images.image_sizes,
                                         box_track=box_track)
            else:
                raise NotImplementedError
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs[0], images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results, pre_embed.permute(1, 0, 2)
Пример #13
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one video.
                For now, each item in the list is a list of dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        """
        assert len(batched_inputs) == 1
        dataset_dict = batched_inputs[0]
        images, images_xywh = self.preprocess_image(dataset_dict)

        # Feature Extraction.
        src = self.backbone(images.tensor)
        features = list()
        for f in self.in_features:
            feature = src[f]
            features.append(feature)

        # Prepare Proposals.
        proposal_boxes = images_xywh[:,
                                     None, :].repeat(1, self.num_proposals, 1)

        # Prediction.
        outputs_class, outputs_coord = self.head(
            features, proposal_boxes, self.init_proposal_features.weight)
        output = {
            'pred_logits': outputs_class[-1],
            'pred_boxes': outputs_coord[-1]
        }

        if self.training:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            targets = self.prepare_targets(gt_instances)
            if self.deep_supervision:
                output['aux_outputs'] = [{
                    'pred_logits': a,
                    'pred_boxes': b
                } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])]

            loss_dict = self.criterion(output, targets)
            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]
            return loss_dict

        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            results = self.inference(box_cls, box_pred, images.image_sizes)

            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})

            return processed_results
Пример #14
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        """
        # images(ImageList), images_whwh(Tensor) with [w,h,w,h] for what?
        images, images_whwh = self.preprocess_image(batched_inputs)
        if isinstance(images, (list, torch.Tensor)):
            images = nested_tensor_from_tensor_list(images)

        # Feature Extraction.
        # return a dict = {"res2":fmap,..."res5":}
        src = self.backbone(images.tensor)
        features = list()        
        # self.in_features: ["res2", "res3", "res4", "res5"]
        for f in self.in_features:
            feature = src[f]
            features.append(feature)

        # Cls & Reg Prediction.
        # outputs_class: with shape (N, num_class, H/4, W/4)
        # outputs_coord(Tensor): with shape (N, 4, H/4, W/4)
        # coord 是已经调整跟实际
        outputs_class, outputs_coord = self.head(features)
        
        output = {'pred_logits': outputs_class, 'pred_boxes': outputs_coord}
        """
        output:{
            "pred_logits":with shape (N, num_class, H/4, W/4)
            "pred_boxes": with shape (N, 4, H/4, W/4) boxes 已经中心原图化
        }
        """
        if self.training:
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
            targets = self.prepare_targets(gt_instances)
            """ targets = [dict]
            每个dict 是一个图片的
            dict = {
                label:(tensor) [num] 类别
                boxes:(tensor) [num, 4] (, cx,cy,w,h) 归一化后的,boxes format
                boxes_xyxy: (tensor), [num, 4] # 原来的boxes
                image_size_xyxy:(tensor) [4] # [w,h,w,h]
                image_size_xyxy_tgt:(tensor) [num,4]  item 同上
                area: (tensor), [num] # 计算每个boxes 的面积
                all is *.to(self.device)
            }
            """
            # 2021.2.26
            # loss_dict:{"loss_ce":, "loss_giou":, "loss_bbox":} loss_bbox:l1_loss(x1,y1,x2,y2) already normalized
            loss_dict = self.criterion(output, targets)
            # 对loss 加权
            weight_dict = self.criterion.weight_dict
            for k in loss_dict.keys():
                if k in weight_dict:
                    loss_dict[k] *= weight_dict[k]
            
            return loss_dict

        else:
            box_cls = output["pred_logits"]
            box_pred = output["pred_boxes"]
            #ret: list[Instances] with shape [batch_size]
            """
            if without nms, shape is [topk] for each attr
            Instances:
                .pred_boxes(Boxes): Boxes.tensor with shape[topk]
                .scores(Tensor): shape[topk]
                .pred_classes(Tensor): prediction of class id
            """
            # images.image_sizes is the size after data tranformation
            results = self.inference(box_cls, box_pred, images.image_sizes)

            processed_results = []
            for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
                # 获取图片h,w 信息, 此处的尺寸是原始图片的尺寸
                # image_size 是经过transform 后的
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                # 对 bbox 缩放对应于原始图片的尺寸
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            
            return processed_results
Пример #15
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`.
                  See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """
        image_path = [x['file_name'] for x in batched_inputs]
        if self.training:
            flips = [x['flip'] for x in batched_inputs]
        else:
            flips = None
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)

        if "proposals" in batched_inputs[0]:
            proposals = [
                x["proposals"].to(self.device) for x in batched_inputs
            ]
            proposal_losses = {}

        if "sem_seg" in batched_inputs[0]:
            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
            gt_sem_seg = ImageList.from_tensors(
                gt_sem_seg, self.backbone.size_divisibility,
                self.sem_seg_head.ignore_value).tensor
        else:
            gt_sem_seg = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(
            features, gt_sem_seg)

        if "integral_sem_seg" in batched_inputs[0] and self.training:
            gt_integral_sem_seg = [
                x["integral_sem_seg"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_integral_sem_seg = None

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
            if hasattr(self.roi_heads.box_predictor, 'add_pseudo_label'):
                gt_instances = self.roi_heads.box_predictor.add_pseudo_label(
                    gt_instances, image_path, flips)
        else:
            gt_instances = None

        if self.proposal_generator:
            proposals, proposal_losses = self.proposal_generator(
                images, features, gt_instances, gt_integral_sem_seg)
        detector_results, detector_losses = self.roi_heads(
            images,
            features,
            proposals,
            gt_instances,
            gt_integral_sem_seg,
            image_path=image_path,
            flips=flips)

        if self.training:
            losses = {}
            losses.update(sem_seg_losses)
            losses.update({
                k: v * self.instance_loss_weight
                for k, v in detector_losses.items()
            })
            losses.update(proposal_losses)
            return losses

        processed_results = []
        for sem_seg_result, detector_result, input_per_image, image_size in zip(
                sem_seg_results, detector_results, batched_inputs,
                images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height,
                                            width)
            detector_r = detector_postprocess(detector_result, height, width)

            processed_results.append({
                "sem_seg": sem_seg_r,
                "instances": detector_r
            })

            if self.combine_on:
                panoptic_r = combine_semantic_and_instance_outputs(
                    detector_r,
                    sem_seg_r.argmax(dim=0),
                    self.combine_overlap_threshold,
                    self.combine_stuff_area_limit,
                    self.combine_instances_confidence_threshold,
                )
                processed_results[-1]["panoptic_seg"] = panoptic_r
        return processed_results