Exemplo n.º 1
0
    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        proposals, proposal_losses = self.rpn(images, features, targets)
        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(
                features, proposals, targets)
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result
Exemplo n.º 2
0
    def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image,
                                   self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        # compute predictions

        with jt.no_grad():
            predictions = self.model(image_list)

        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        height, width = original_image.shape[:-1]
        input_w, input_h = prediction.size

        prediction = prediction.resize((width, height))

        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")

            if masks.ndim == 3:
                # resize masks

                stride_mask = float(prediction.get_field('stride').item())
                h = math.ceil(masks.shape[1] * stride_mask * height / input_h)
                w = math.ceil(masks.shape[2] * stride_mask * width / input_w)
                mask_th = prediction.get_field('mask_th')
                masks = masks
                masks = nn.interpolate(X=masks.unsqueeze(1).float(),
                                       size=(h, w),
                                       mode="bilinear",
                                       align_corners=False) > mask_th
                masks = masks[:, :, :height, :width]

                #masks = masks.unsqueeze(1)
                prediction.add_field("mask", masks)
            else:
                # always single image is passed at a time
                masks = self.masker([masks], [prediction])[0]

                prediction.add_field("mask", masks)

        return prediction
Exemplo n.º 3
0
    def __call__(self, batch):
        transposed_batch = list(zip(*batch))
        images = to_image_list(transposed_batch[0], self.size_divisible)
        targets = transposed_batch[1]
        img_ids = transposed_batch[2]
        if isinstance(images, tuple):
            return images[0], images[1], img_ids

        return images, targets, img_ids
Exemplo n.º 4
0
    def execute(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.is_training() and targets is None:
            raise ValueError("In training mode, targets should be passed")
        #print(3,time.asctime())
        images = to_image_list(images)
        features = self.backbone(images.tensors)
        # print('backbone',jt.mean(features[0]))
        #jt.sync_all()
        #print(4,time.asctime())
        # print('Backbone',features[0].mean())

        proposals, proposal_losses = self.rpn(images, features, targets)
        # print('RPN',proposals[0].bbox,proposals[0].bbox.shape)


        #jt.sync_all()
        #print(5,time.asctime())
        if self.roi_heads:
            x, result, detector_losses = self.roi_heads(features, proposals, targets)
            #print('x',x)
            #print('result',result[0].bbox)
            #print('detector_losses',detector_losses)
        
        else:
            # RPN-only models don't have roi_heads
            x = features
            result = proposals
            detector_losses = {}
        #jt.sync_all()
        #print(6,time.asctime())

        if self.is_training():
            losses = {}
            losses.update(detector_losses)
            losses.update(proposal_losses)
            return losses

        return result
Exemplo n.º 5
0
def im_detect_bbox(model, images, target_scale, target_max_size):
    """
    Performs bbox detection on the original image.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    return model(images)
Exemplo n.º 6
0
def im_detect_bbox_hflip(model, images, target_scale, target_max_size):
    """
    Performs bbox detection on the horizontally flipped image.
    Function signature is the same as for im_detect_bbox.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.RandomHorizontalFlip(1.0),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    boxlists = model(images)

    # Invert the detections computed on the flipped image
    boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
    return boxlists_inv
Exemplo n.º 7
0
def run_model(config_file, img_f=None):
    original_image = load(img_f)
    from detectron.config import cfg
    from detectron.modeling.detector import build_detection_model
    from detectron.utils.checkpoint import DetectronCheckpointer
    from detectron.structures.image_list import to_image_list
    from detectron.modeling.roi_heads.mask_head.inference import Masker

    from jittor import transform as T
    from jittor import nn
    import jittor as jt
    from jittor_utils import auto_diff

    jt.flags.use_cuda = 1
    confidence_threshold = 0.0

    cfg.merge_from_file(config_file)
    model = build_detection_model(cfg)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    name = config_file.split('/')[-1].split('.')[0]
    # hook = auto_diff.Hook(name)
    # hook.hook_module(model)
    model.eval()

    class Resize(object):
        def __init__(self, min_size, max_size):
            self.min_size = min_size
            self.max_size = max_size

        # modified from torchvision to add support for max size
        def get_size(self, image_size):
            w, h = image_size
            size = self.min_size
            max_size = self.max_size
            if max_size is not None:
                min_original_size = float(min((w, h)))
                max_original_size = float(max((w, h)))
                if max_original_size / min_original_size * size > max_size:
                    size = int(
                        round(max_size * min_original_size /
                              max_original_size))

            if (w <= h and w == size) or (h <= w and h == size):
                return (h, w)

            if w < h:
                ow = size
                oh = int(size * h / w)
            else:
                oh = size
                ow = int(size * w / h)

            return (oh, ow)

        def __call__(self, image):
            size = self.get_size(image.size)
            image = T.resize(image, size)
            return image

    def build_transform():
        if cfg.INPUT.TO_BGR255:
            to_bgr_transform = T.Lambda(lambda x: x * 255)
        else:
            to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])

        normalize_transform = T.ImageNormalize(mean=cfg.INPUT.PIXEL_MEAN,
                                               std=cfg.INPUT.PIXEL_STD)
        min_size = cfg.INPUT.MIN_SIZE_TEST
        max_size = cfg.INPUT.MAX_SIZE_TEST
        transform = T.Compose([
            T.ToPILImage(),
            Resize(min_size, max_size),
            T.ToTensor(),
            to_bgr_transform,
            normalize_transform,
        ])
        return transform

    transforms = build_transform()
    image = transforms(original_image)
    image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY)
    predictions = model(image_list)

    predictions = predictions[0]
    if predictions.has_field("mask_scores"):
        scores = predictions.get_field("mask_scores")
    else:
        scores = predictions.get_field("scores")

    keep = jt.nonzero(scores > confidence_threshold).squeeze(1)
    predictions = predictions[keep]
    scores = predictions.get_field("scores")
    idx, _ = jt.argsort(scores, 0, descending=True)
    predictions = predictions[idx]

    result_diff(predictions)