Exemplo n.º 1
0
    def get_hidden_outputs(self, batched_inputs):
        # complete image
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.inpaint_net.size_divisibility)
        # triplet input maps:
        # erased regions
        masks = [x["mask"].to(self.device) for x in batched_inputs]
        masks = ImageList.from_tensors(masks,
                                       self.inpaint_net.size_divisibility)
        # mask the input image with masks
        erased_ims = images.tensor * (1. - masks.tensor)
        # ones map
        ones_ims = [
            torch.ones_like(x["mask"].to(self.device)) for x in batched_inputs
        ]
        ones_ims = ImageList.from_tensors(ones_ims,
                                          self.inpaint_net.size_divisibility)
        # the conv layer use zero padding, this is used to indicate the image boundary

        # generation process
        input_tensor = torch.cat([erased_ims, ones_ims.tensor, masks.tensor],
                                 dim=1)

        all_hidden_outputs = self.inpaint_net.get_hidden_outputs(
            input_tensor, masks.tensor)
        # offset_flow is used to visualize

        return all_hidden_outputs
Exemplo n.º 2
0
    def _forward(self, batched_inputs):
        image_path = [x['file_name'] for x in batched_inputs]
        if self.training:
            flips = [x['flip'] for x in batched_inputs]
        else:
            flips = None
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)
        proposals = None

        if "proposals" in batched_inputs[0]:
            proposals = [
                x["proposals"].to(self.device) for x in batched_inputs
            ]
            proposal_losses = {}

        if "sem_seg" in batched_inputs[0]:
            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
            gt_sem_seg = ImageList.from_tensors(
                gt_sem_seg, self.backbone.size_divisibility,
                self.sem_seg_head.ignore_value).tensor
        else:
            gt_sem_seg = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(
            features, gt_sem_seg)

        if "integral_sem_seg" in batched_inputs[0] and self.training:
            gt_integral_sem_seg = [
                x["integral_sem_seg"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_integral_sem_seg = None

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        if self.proposal_generator:
            proposals, proposal_losses = self.proposal_generator(
                images, features, gt_instances, gt_integral_sem_seg)
        else:
            proposal_losses = {}

        if "instances" in batched_inputs[0]:
            if hasattr(self.roi_heads.box_predictor, 'add_pseudo_label'):
                gt_instances = self.roi_heads.box_predictor.add_pseudo_label(
                    gt_instances, image_path, flips)
        losses = {}
        if self.training:
            losses.update(sem_seg_losses)
            losses.update(proposal_losses)

        return images, features, proposals, gt_instances, gt_integral_sem_seg, sem_seg_results, losses
Exemplo n.º 3
0
    def forward(self, batched_inputs):
        if not self.training and not self.visualize_path:
            return self.single_test(batched_inputs)

        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        features = self.backbone(images.tensor)
        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        if "sem_seg" in batched_inputs[0]:
            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
            gt_sem_seg = ImageList.from_tensors(
                gt_sem_seg, self.backbone.size_divisibility,
                self.refinement_head.ignore_value).tensor
        else:
            gt_sem_seg = None

        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances)
        edge_map, head_losses, proposals = self.refinement_head(
            features, proposals,
            (gt_sem_seg, [gt_instances, images.image_sizes]))

        # In training, the proposals are not useful at all in RPN models; but not here
        # This makes RPN-only models about 5% slower.
        if self.training:
            proposal_losses.update(head_losses)
            return proposal_losses

        processed_results = []

        for per_edge_map, results_per_image, input_per_image, image_size in zip(
                edge_map, proposals, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            edge_map_r = edge_map_postprocess(per_edge_map, image_size)
            instance_r = detector_postprocess(proposals[0], height, width)
            processed_results.append(
                {
                    "instances": instance_r,
                    "edge_map": edge_map_r
                }, )
        return processed_results
Exemplo n.º 4
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                   * "image": Tensor, image in (C, H, W) format.
                   * "sem_seg": semantic segmentation ground truth
                   * Other information that's included in the original dicts, such as:
                     "height", "width" (int): the output resolution of the model (may be different
                     from input resolution), used in inference.


        Returns:
            list[dict]:
              Each dict is the output for one input image.
              The dict contains one key "sem_seg" whose value is a
              Tensor that represents the
              per-pixel segmentation prediced by the head.
              The prediction has shape KxHxW that represents the logits of
              each class for each pixel.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(
            images,
            self.backbone.size_divisibility,
            padding_constraints=self.backbone.padding_constraints,
        )

        features = self.backbone(images.tensor)

        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets,
                self.backbone.size_divisibility,
                self.sem_seg_head.ignore_value,
                self.backbone.padding_constraints,
            ).tensor
        else:
            targets = None
        results, losses = self.sem_seg_head(features, targets)

        if self.training:
            return losses

        processed_results = []
        for result, input_per_image, image_size in zip(results, batched_inputs,
                                                       images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = sem_seg_postprocess(result, image_size, height, width)
            processed_results.append({"sem_seg": r})
        return processed_results
Exemplo n.º 5
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.

        For now, each item in the list is a dict that contains:
            image: Tensor, image in (C, H, W) format.
            sem_seg: semantic segmentation ground truth
            Other information that's included in the original dicts, such as:
                "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.

        Returns:
            list[dict]: Each dict is the output for one input image.
                The dict contains one key "sem_seg" whose value is a
                Tensor of the output resolution that represents the
                per-pixel segmentation prediction.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images, self.backbone.size_divisibility)

        features = self.backbone(images.tensor)
           
        if "contours" in batched_inputs[0]:
            contours = ImageList.from_tensors(
                [x["contours"].gt_contours.to(self.device).tensor for x in batched_inputs], self.backbone.size_divisibility
            ).tensor
            segmasks = ImageList.from_tensors(
                [x["contours"].gt_segmasks.to(self.device).tensor for x in batched_inputs], self.backbone.size_divisibility
            ).tensor
        else:
            contours = None
            segmasks = None
        
        if "instances" in batched_inputs[0]:
            objmask = [x["instances"].gt_masks.to(self.device).tensor for x in batched_inputs]
            classes = [x["instances"].gt_classes.to(self.device) for x in batched_inputs]
        else:
            objmask = None
            classes = None         
        
        results, losses = self.sem_seg_head(features, segmasks, contours, objmask, classes)        
        if self.training:
            return losses

        processed_results = []
        for segmap, contour, emb, input_per_image, image_size in zip(results[0], results[1], results[2], batched_inputs, images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            
            #TODO: translate semantic segmentations and contour maps into detection bounding boxes
            r = seg_det_postprocess(segmap, contour, emb, image_size, height, width)
            processed_results.append({"instances": r})  #, "segmap": segmap, "contour": contour, "emb": emb
        return processed_results
Exemplo n.º 6
0
    def forward(self, batched_inputs):
        """
        Args:
            Same as in :class:`GeneralizedRCNN.forward`

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "proposals" whose value is a
                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        masks = {
            key: ImageList.from_tensors([x[key] for x in batched_inputs],
                                        self.backbone.size_divisibility)
            for key in self.masks
        }
        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances, **masks)
        # In training, the proposals are not useful at all but we generate them anyway.
        # This makes RPN-only models about 5% slower.
        if self.training:
            return proposal_losses

        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                proposals, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"proposals": r})

        return processed_results
Exemplo n.º 7
0
 def preprocess_images(self, batched_inputs):
     """
     Normalize, pad and batch the input image pairs.
     """
     pre_images = [x["pre_image"].to(self.device) for x in batched_inputs]
     pre_images = [self.normalizer(x) for x in pre_images]
     pre_images = ImageList.from_tensors(pre_images,
                                         self.backbone.size_divisibility)
     post_images = [x["post_image"].to(self.device) for x in batched_inputs]
     post_images = [self.normalizer(x) for x in post_images]
     post_images = ImageList.from_tensors(post_images,
                                          self.backbone.size_divisibility)
     return pre_images, post_images
Exemplo n.º 8
0
def setup(file):
    # get cfg
    cfg = get_cfg()
    cfg.merge_from_file(file)
    cfg.SOLVER.IMS_PER_BATCH = 2

    # get data loader iter
    data_loader = build_detection_train_loader(cfg)
    data_loader_iter = iter(data_loader)
    batched_inputs = next(data_loader_iter)

    # build anchors
    backbone = build_backbone(cfg).to(device)
    images = [x["image"].to(device) for x in batched_inputs]
    images = ImageList.from_tensors(images, backbone.size_divisibility)
    features = backbone(images.tensor.float())

    input_shape = backbone.output_shape()
    in_features = cfg.MODEL.RPN.IN_FEATURES
    anchor_generator = build_anchor_generator(
        cfg, [input_shape[f] for f in in_features])
    anchors = anchor_generator([features[f] for f in in_features])
    anchors = Boxes.cat(anchors).to(device)

    # build matcher
    raw_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS,
                          allow_low_quality_matches=True)
    matcher = TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS, 9)

    return cfg, data_loader_iter, anchors, matcher, raw_matcher
 def test_rpn_proposals_inf(self):
     N, Hi, Wi, A = 3, 3, 3, 3
     proposals = [torch.rand(N, Hi * Wi * A, 4)]
     pred_logits = [torch.rand(N, Hi * Wi * A)]
     pred_logits[0][1][3:5].fill_(float("inf"))
     images = ImageList.from_tensors([torch.rand(3, 10, 10)] * 3)
     find_top_rpn_proposals(proposals, pred_logits, images, 0.5, 1000, 1000, 0, False)
Exemplo n.º 10
0
    def preprocess_image(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
        """
        Normalize, pad and batch the input images.
        """
        # Print some things for testing purposes
        '''
        test_x_b = batched_inputs[0]
        print('first line:')
        print(test_x_b["image"].to(self.device))
        '''

        images = [x["image"].to(self.device) for x in batched_inputs]

        '''
        test_x = images[0]
        print('second line:')
        print(np.shape(test_x))
        print('third line:')
        print(np.shape(self.pixel_mean))
        print('fourth line:')
        print(np.shape(self.pixel_std))
        '''

        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
        return images
Exemplo n.º 11
0
def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
    """
    See get_caffe2_inputs() below.
    """
    assert all(isinstance(x, dict) for x in batched_inputs)
    assert all(x["image"].dim() == 3 for x in batched_inputs)

    images = [x["image"] for x in batched_inputs]
    images = ImageList.from_tensors(images, size_divisibility)

    im_info = []
    for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
        target_height = input_per_image.get("height", image_size[0])
        target_width = input_per_image.get("width", image_size[1])  # noqa
        # NOTE: The scale inside im_info is kept as convention and for providing
        # post-processing information if further processing is needed. For
        # current Caffe2 model definitions that don't include post-processing inside
        # the model, this number is not used.
        # NOTE: There can be a slight difference between width and height
        # scales, using a single number can results in numerical difference
        # compared with D2's post-processing.
        scale = target_height / image_size[0]
        im_info.append([image_size[0], image_size[1], scale])
    im_info = torch.Tensor(im_info)

    return images.tensor.to(device), im_info.to(device)
Exemplo n.º 12
0
 def preprocess(self, images):
     processed_images = []
     for image in images:
         height, width = image.shape[:2]
         print("height=", height, "  width=", width)
         image = image.to(device=self.device, non_blocking=True)
         image = image.permute(2, 0, 1).type(torch.float)
         origin_ratio = width / height
         cfg_ratio = self.cfg.INPUT.MAX_SIZE_TEST / self.cfg.INPUT.MIN_SIZE_TEST
         if cfg_ratio > origin_ratio:
             target_height = self.cfg.INPUT.MIN_SIZE_TEST
             target_width = int(round(target_height * origin_ratio))
         else:
             target_width = self.cfg.INPUT.MAX_SIZE_TEST
             target_height = int(round(target_width / origin_ratio))
         target_shape = (target_height, target_width)
         image = F.interpolate(image.unsqueeze(0),
                               target_shape,
                               mode='bilinear',
                               align_corners=False)
         image = (image.squeeze(0) - self.predictor.model.pixel_mean) / \
             self.predictor.model.pixel_std
         processed_images.append(image)
     images = ImageList.from_tensors(
         processed_images, self.predictor.model.backbone.size_divisibility)
     return images
Exemplo n.º 13
0
 def preprocess_seg(self, batched_inputs):
     images = [
         x["segment_annotation"].to(self.device) for x in batched_inputs
     ]
     images = ImageList.from_tensors(images,
                                     self.backbone.size_divisibility)
     return images
Exemplo n.º 14
0
 def preprocess_semseg_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["sem_seg"] for x in batched_inputs]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
Exemplo n.º 15
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [self.normalizer(x.to(self.device)) for x in batched_inputs]
     images = ImageList.from_tensors(images, 2)
     return images
Exemplo n.º 16
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.

        For now, each item in the list is a dict that contains:
            image: Tensor, image in (C, H, W) format.
            sem_seg: semantic segmentation ground truth
            Other information that's included in the original dicts, such as:
                "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.

        Returns:
            list[dict]: Each dict is the output for one input image.
                The dict contains one key "sem_seg" whose value is a
                Tensor of the output resolution that represents the
                per-pixel segmentation prediction.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        features = self.backbone(images.tensor)

        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, self.backbone.size_divisibility,
                self.sem_seg_head.ignore_value).tensor
        else:
            targets = None
        results, losses = self.sem_seg_head(features, targets)

        if self.training:
            return losses

        processed_results = []
        for result, input_per_image, image_size in zip(results, batched_inputs,
                                                       images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(result, image_size, height, width)
            processed_results.append({"sem_seg": r})
        return processed_results
Exemplo n.º 17
0
 def preprocess_batchedimages(self, batched_inputs):
     """
     Preprocess batch: normalized, resize, pad -> uniform batch
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
Exemplo n.º 18
0
 def preprocess_flow(self, batched_inputs):
     """
     Normalize and pad and batch the target flow.
     """
     flows = [x["flow_map"].to(self.device) for x in batched_inputs]
     flows = [x / self.flow_div for x in flows]
     flows = ImageList.from_tensors(flows).tensor
     return flows
Exemplo n.º 19
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
Exemplo n.º 20
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images, self.encoder.size_divisibility)
     return images
Exemplo n.º 21
0
 def preprocess_image(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     return images
Exemplo n.º 22
0
    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
        images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor
        metas = []
        rescale = {"height" in x for x in batched_inputs}
        if len(rescale) != 1:
            raise ValueError("Some inputs have original height/width, but some don't!")
        rescale = list(rescale)[0]
        output_shapes = []
        for input in batched_inputs:
            meta = {}
            c, h, w = input["image"].shape
            meta["img_shape"] = meta["ori_shape"] = (h, w, c)
            if rescale:
                scale_factor = np.sqrt(h * w / (input["height"] * input["width"]))
                ori_shape = (input["height"], input["width"])
                output_shapes.append(ori_shape)
                meta["ori_shape"] = ori_shape + (c,)
            else:
                scale_factor = 1.0
                output_shapes.append((h, w))
            meta["scale_factor"] = scale_factor
            meta["flip"] = False
            padh, padw = images.shape[-2:]
            meta["pad_shape"] = (padh, padw, c)
            metas.append(meta)

        if self.training:
            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
            if gt_instances[0].has("gt_masks"):
                from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks

                def convert_mask(m, shape):
                    # mmdet mask format
                    if isinstance(m, BitMasks):
                        return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1])
                    else:
                        return mm_PolygonMasks(m.polygons, shape[0], shape[1])

                gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances]
            else:
                gt_masks = None
            losses_and_metrics = self.detector.forward_train(
                images,
                metas,
                [x.gt_boxes.tensor for x in gt_instances],
                [x.gt_classes for x in gt_instances],
                gt_masks=gt_masks,
            )
            return _parse_losses(losses_and_metrics)
        else:
            results = self.detector.simple_test(images, metas, rescale=rescale)
            results = [
                {"instances": _convert_mmdet_result(r, shape)}
                for r, shape in zip(results, output_shapes)
            ]
            return results
Exemplo n.º 23
0
 def _preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     labels = torch.LongTensor([x["label"]
                                for x in batched_inputs]).to(self.device)
     images = ImageList.from_tensors(images)
     return images, labels
Exemplo n.º 24
0
    def preprocess_image(self, batched_inputs):
        # all models from detectron2 preprocess the images the same way
        # this could change in the future; fingers crossed
        # reference: https://github.com/facebookresearch/detectron2/tree/master/detectron2/modeling/meta_arch  # last checked: 23.11.20

        images = [x["image"].to(self.model.device) for x in batched_inputs]
        images = [(x - self.model.pixel_mean) / self.model.pixel_std for x in images]
        images = ImageList.from_tensors(images, self.model.backbone.size_divisibility)
        return images
Exemplo n.º 25
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict has the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """
        if not self.training:
            return self.inference(batched_inputs)
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        assert "sem_seg" in batched_inputs[0]
        gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
        gt_sem_seg = ImageList.from_tensors(
            gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
        ).tensor
        sem_seg_results, sem_seg_losses, feat, seg_score = self.sem_seg_head(features, gt_sem_seg)
        #
        del sem_seg_results
        #
        gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
        detector_results, detector_losses, box_features = self.roi_heads(
            images, features, proposals, gt_instances
        )

        #############################
        # Graph op
        #############################
        instance, sem_seg_results, losses_graph = self.graph_connection(
            box_features, detector_results, features,
            feat, seg_score, gt_sem_seg,
        )

        losses = sem_seg_losses
        losses.update(proposal_losses)
        losses.update(detector_losses)
        losses.update(losses_graph)
        return losses
Exemplo n.º 26
0
 def preprocess_image(self, batched_inputs: Tuple[Dict[str, Tensor]]):
     '''
         Normalize and batch the input images.
     '''
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [x.float().div(255) for x in images if x.dtype != torch.float]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     images = ImageList.from_tensors(images)
     return images
Exemplo n.º 27
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x.to(self.device) for x in batched_inputs]
     norms = [self.normalizer(x) for x in images]
     size = (norms[0].shape[1], norms[0].shape[2])
     images = ImageList.from_tensors(norms, self.backbone.size_divisibility)
     return images, size
Exemplo n.º 28
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [(x - self.pixel_mean) / self.pixel_std for x in images]
     if self.dynamic:
         images = ImageList.from_tensors(images, self.backbone.size_divisibility)
     else:
         if self.training:
             min_size = self.input.MIN_SIZE_TRAIN
             max_size = self.input.MAX_SIZE_TRAIN
         else:
             min_size = self.input.MIN_SIZE_TEST
             max_size = self.input.MAX_SIZE_TEST
         min_size = min_size[0] if isinstance(min_size, tuple) else min_size
         images = ImageList.from_tensors(images, self.backbone.size_divisibility,
                                         max_height=min_size, max_width=max_size)
     return images
Exemplo n.º 29
0
 def preprocess_image(self, batched_inputs, norm=True):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     if norm:
         images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images, 512)
     images = images.to(self.device)
     return images
Exemplo n.º 30
0
    def forward(self, batched_inputs):
        # complete image
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.inpaint_net.size_divisibility)
        # triplet input maps:
        # erased regions
        masks = [x["mask"].to(self.device) for x in batched_inputs]
        masks = ImageList.from_tensors(masks,
                                       self.inpaint_net.size_divisibility)
        # mask the input image with masks
        erased_ims = images.tensor * (1. - masks.tensor)
        # ones map
        ones_ims = [
            torch.ones_like(x["mask"].to(self.device)) for x in batched_inputs
        ]
        ones_ims = ImageList.from_tensors(ones_ims,
                                          self.inpaint_net.size_divisibility)
        # the conv layer use zero padding, this is used to indicate the image boundary

        # generation process
        input_tensor = torch.cat([erased_ims, ones_ims.tensor, masks.tensor],
                                 dim=1)
        coarse_inp, fine_inp, offset_flow = self.inpaint_net(
            input_tensor, masks.tensor)
        # offset_flow is used to visualize

        if self.training:
            raise NotImplementedError
        else:
            processed_results = []
            inpainted_im = erased_ims * (
                1. - masks.tensor) + fine_inp * masks.tensor
            for result, input_per_image, image_size in zip(
                    inpainted_im, batched_inputs, images.image_sizes):
                height = input_per_image.get("height")
                width = input_per_image.get("width")
                r = sem_seg_postprocess(result, image_size, height, width)
                # abuse semantic segmentation postprocess. it basically does some resize
                processed_results.append({"inpainted": r})
            return processed_results