Exemplo n.º 1
0
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
            for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None
            )
            alpha = 0.3
        else:
            alpha = 0.5

        frame_visualizer.overlay_instances(
            # boxes=None if masks is not None else boxes,  # boxes are a bit distracting
            boxes=boxes,
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Exemplo n.º 2
0
    def draw_instance_predictions(self, frame, predictions, effect_type):

        frame_visualizer = Visualizer(frame, self.metadata)
        cnt = predictions["current_frame"]
        num_instances = predictions["num_instances"]
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions["boxes"]
        scores = predictions["scores"]
        classes = predictions["classes"]
        keypoints = predictions["keypoints"]
        masks = predictions["masks"]

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None)
            alpha = 0.3
        else:
            alpha = 0.5

        if effect_type == 0:
            frame_visualizer.overlay_instances_scanning(
                boxes=None
                if masks is not None else boxes,  # boxes are a bit distracting
                cnt=cnt,
                masks=masks,
                labels=labels,
                keypoints=keypoints,
                assigned_colors=colors,
                alpha=alpha,
            )
        else:
            frame_visualizer.overlay_instances_stop_motion(
                boxes=None
                if masks is not None else boxes,  # boxes are a bit distracting
                cnt=cnt,
                masks=masks,
                labels=labels,
                keypoints=keypoints,
                assigned_colors=colors,
                alpha=alpha,
            )

        return frame_visualizer.output
Exemplo n.º 3
0
    def draw_instance_predictions(self, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        boxes = None
        scores = None
        classes = predictions.pred_classes if predictions.has(
            "pred_classes") else None
        labels = _create_text_labels(
            classes, scores,
            ["Hv", "Hp", "CLS", "BL", "PD", "PB", "CC", "LM", "D/P"])
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = np.asarray(predictions.pred_masks)
            masks = [
                GenericMask(x, self.output.height, self.output.width)
                for x in masks
            ]
        else:
            masks = None

        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                "thing_colors"):
            colors = [
                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
                for c in classes
            ]
            alpha = 0.8
        else:
            colors = None
            alpha = 0

        if self._instance_mode == ColorMode.IMAGE_BW:
            self.output.img = self._create_grayscale_image((
                predictions.pred_masks.any(dim=0) > 0
            ).numpy() if predictions.has("pred_masks") else None)
            alpha = 0.3

        self.overlay_instances(
            masks=masks,
            boxes=boxes,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )
        return self.output
Exemplo n.º 4
0
    def draw_instance_predictions(self, predictions, track_ids):
        """
        Draw instance-level prediction results on an image.

        Args:
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        boxes = predictions.pred_boxes if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes if predictions.has(
            "pred_classes") else None
        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = np.asarray(predictions.pred_masks)
            masks = [
                GenericMask(x, self.output.height, self.output.width)
                for x in masks
            ]
        else:
            masks = None

        # set the color according to the track ids
        colors = [cm.tab20(id_) for id_ in track_ids]
        alpha = 0.6

        labels = [
            f'Track {id_} {label}' for label, id_ in zip(labels, track_ids)
        ]

        # increase font size
        if self._default_font_size < 20: self._default_font_size *= 1.3

        if self._instance_mode == ColorMode.IMAGE_BW:
            assert predictions.has(
                "pred_masks"), "ColorMode.IMAGE_BW requires segmentations"
            self.output.img = self._create_grayscale_image(
                (predictions.pred_masks.any(dim=0) > 0).numpy())

        self.overlay_instances(
            masks=masks,
            boxes=boxes,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )
        return self.output
Exemplo n.º 5
0
def object_detection_obtain_label(predictor,cfg,img):

    outputs = predictor(img)
    predictions = outputs["instances"]
    scores = predictions.scores if predictions.has("scores") else None
    classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None
    labels = _create_text_labels(classes, None, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).get("thing_classes", None))
    label = np.unique(np.array(labels))
    return label
Exemplo n.º 6
0
    def get_labels(self, imgs):
        # img is a numpy array
        labels = []
        predictor = DefaultPredictor(self.cfg)
        for img in imgs:
            img = self.arr_to_rgb(img)
            img = np.moveaxis(img, 0, 2)
            outputs = predictor(img)
            scores = outputs["instances"].scores if outputs["instances"].has("scores") else None
            classes = outputs["instances"].pred_classes if outputs["instances"].has("pred_classes") else None
            labels.append(list(set(_create_text_labels(classes, None, MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).get("thing_classes", None)))))

        return labels
def draw_instance_predictions(vis,
                              tubelet_ids,
                              tubelet_instances,
                              tubelet_instance_projections,
                              draw_projections=False):
    def get_color(i):
        colors = "bgrcmy"
        return colors[i % len(colors)]

    if not any(tubelet_instances):
        return vis.output

    tubelet_instance_ids = [
        i for i, inst in zip(tubelet_ids, tubelet_instances)
        if inst is not None
    ]
    tubelet_instances = Instances.cat(
        [inst for inst in tubelet_instances if inst is not None])

    labels = visualizer._create_text_labels(
        tubelet_instances.pred_classes, tubelet_instances.scores,
        vis.metadata.get("thing_classes", None))
    for i, tubelet_id in enumerate(tubelet_instance_ids):
        labels[
            i] = f"{labels[i]} ({tubelet_instances.generation_process[i]}, #{tubelet_id})"

    colors = [get_color(i) for i in tubelet_instance_ids]
    vis.overlay_instances(
        boxes=tubelet_instances.pred_boxes,
        labels=labels,
        assigned_colors=colors,
        alpha=0.5,
    )

    if draw_projections:
        tubelet_instance_projection_ids = [
            i for i, inst in zip(tubelet_ids, tubelet_instance_projections)
            if inst is not None
        ]
        tubelet_instance_projections = Instances.cat([
            inst for inst in tubelet_instance_projections if inst is not None
        ])
        colors = [get_color(i) for i in tubelet_instance_projection_ids]
        labels = [f"Pred. #{i}" for i in tubelet_instance_projection_ids]
        vis.overlay_instances(
            boxes=tubelet_instance_projections.pred_boxes,
            labels=labels,
            assigned_colors=colors,
            alpha=0.1,
        )
    return vis.output
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores".

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None

        detected = [
            _DetectedInstance(classes[i], boxes[i], color=None, ttl=8)
            for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
            )
            alpha = 0.3
        else:
            alpha = 0.5

        frame_visualizer.overlay_instances(
            boxes=boxes,  # boxes are a bit distracting
            labels=labels,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
    def draw_instance_predictions_with_filters(self, filters, predictions):
        print("draw_instance_predictions_with_filters")
        boxes = predictions.pred_boxes if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes if predictions.has(
            "pred_classes") else None
        labels = vis._create_text_labels(
            classes, scores, self.metadata.get("thing_classes", None))
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = np.asarray(predictions.pred_masks)
            masks = [
                vis.GenericMask(x, self.output.height, self.output.width)
                for x in masks
            ]
        else:
            masks = None

        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                "thing_colors"):
            colors = [
                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
                for c in classes
            ]
            alpha = 0.8
        else:
            colors = None
            alpha = 0.5

        if self._instance_mode == ColorMode.IMAGE_BW:
            self.output.img = self._create_grayscale_image(
                (predictions.pred_masks.any(dim=0) > 0).numpy())
            alpha = 0.3

        _, detected_objects, objects_stats = self.overlay_instances_with_filters(
            filters,
            masks=masks,
            boxes=boxes,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )
        #2020/08/12 Added detected_objects, and objects_stats
        return (self.output, detected_objects, objects_stats)
Exemplo n.º 10
0
 def get_video_labels(self, frame, predictions):
     frame_visualizer = Visualizer(frame, self.metadata)
     num_instances = len(predictions)
     if num_instances == 0:
         return ""
     boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
         "pred_boxes") else None
     scores = predictions.scores if predictions.has("scores") else None
     classes = predictions.pred_classes.numpy() if predictions.has(
         "pred_classes") else None
     keypoints = predictions.pred_keypoints if predictions.has(
         "pred_keypoints") else None
     labels = _create_text_labels(classes, scores,
                                  self.metadata.get("thing_classes", None))
     #print("labels in get video labels:", labels)
     return labels
Exemplo n.º 11
0
def object_detection_obtain_label(predictor, cfg, img):
    """"
    Arguments:
        predictor: object predictor implementing COCO-Detection faster-rcnn backbone architecture
        cfg: object including parameters for the model like weights and threshold
        img: image numpy array
    Returns:
        label: One numpy array containing only detected object names in the image(string)
    """
    outputs = predictor(img)
    predictions = outputs["instances"]
    scores = predictions.scores if predictions.has("scores") else None
    classes = (predictions.pred_classes.tolist()
               if predictions.has("pred_classes") else None)
    labels = _create_text_labels(
        classes,
        None,
        MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).get("thing_classes", None),
    )
    label = np.unique(np.array(labels))
    return label
Exemplo n.º 12
0
    def draw_instance_bbox(self, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        num_instances = len(predictions)
        # If there is no newly detected instance, return old instances
        # which are detected at previous frame
        if num_instances == 0:
            for idx, inst in enumerate(self._old_instances):
                inst.ttl -= 1
                if inst.ttl <= 0:
                    del self._old_instances[idx]
            boxes = [inst.bbox.tolist() for inst in self._old_instances]
            colors = [inst.color for inst in self._old_instances]
            indices = [inst.index for inst in self._old_instances]
            labels = _create_text_labels(
                None, indices, self.metadata.get("thing_classes", None))

            return self._old_instances

        boxes = (predictions.pred_boxes.tensor.numpy()
                 if predictions.has("pred_boxes") else None)
        classes = (predictions.pred_classes.numpy()
                   if predictions.has("pred_classes") else None)
        keypoints = (predictions.pred_keypoints
                     if predictions.has("pred_keypoints") else None)

        # Detect small box which area is small than area threshold
        del_idx = []
        for idx, box in enumerate(boxes):
            area = (box[2] - box[0]) * (box[3] - box[1])
            if area < self.area_threshold:
                num_instances -= 1
                del_idx.append(idx)
        del_idx.reverse()
        for _, idx in enumerate(del_idx):
            boxes = np.delete(boxes, idx, 0)
            classes = np.delete(classes, idx, 0)
            keypoints = np.delete(keypoints, idx, 0)

        # If all of newly instance is smaller than area_threshold,
        # return old instances which are detected at previous frame
        if num_instances == 0:
            for idx, inst in enumerate(self._old_instances):
                inst.ttl -= 1
                if inst.ttl <= 0:
                    del self._old_instances[idx]
            boxes = [inst.bbox.tolist() for inst in self._old_instances]
            colors = [inst.color for inst in self._old_instances]
            indices = [inst.index for inst in self._old_instances]
            labels = _create_text_labels(
                None, indices, self.metadata.get("thing_classes", None))

            return self._old_instances

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
        else:
            masks = None

        detected = [
            _DetectedInstance(
                classes[i],
                bbox=boxes[i],
                index=None,
                path=[boxes[i]],
                extra=False,
                hide=False,
                hide_time=1,
                overlap=False,
                keypoint=keypoints[i],
                sit=False,
                mask_rle=None,
                color=None,
                ttl=50,
            ) for i in range(num_instances)
        ]
        colors, indices = self.tracking(detected)
        labels = _create_text_labels(classes, indices,
                                     self.metadata.get("thing_classes", None))
        boxes = [inst.bbox.tolist() for inst in self._old_instances]

        if self._instance_mode == ColorMode.IMAGE_BW:
            alpha = 0.3
        else:
            alpha = 0.5

        # The function which return true when first point is at higher location than other points.
        # For example, *points=head, wrist, ankle, it will return true in normal case
        def isHigh(*points):
            std = points[0][1]
            for point in points:
                if std > point[1]:
                    return False
            return True

        # Calculate a degree between point1, point2 and point3
        def calDegree(point1, point2, point3):
            a = point1[:2]
            b = point2[:2]
            c = point3[:2]

            ba = a - b
            bc = c - b

            cosine_angle = np.dot(ba, bc) + 1e-6 / (
                (np.linalg.norm(ba) * np.linalg.norm(bc)) + 1e-6)
            if cosine_angle < -1:
                cosine_angle = -1.0
            if cosine_angle > 1:
                cosine_angle = 1.0

            angle = np.degrees(np.arccos(cosine_angle))

            return angle

        for idx, inst in enumerate(self._old_instances):
            # Detect whether each keypoint is located at plausible position or not
            # For example, shoulder should be located higher than writst, knee, foot
            if not isHigh(
                    inst.keypoint[5],
                    inst.keypoint[11],
                    inst.keypoint[12],
                    inst.keypoint[13],
                    inst.keypoint[14],
                    inst.keypoint[15],
                    inst.keypoint[16],
            ):
                inst.keypoint[5][2] = 0
            if not isHigh(
                    inst.keypoint[6],
                    inst.keypoint[11],
                    inst.keypoint[12],
                    inst.keypoint[13],
                    inst.keypoint[14],
                    inst.keypoint[15],
                    inst.keypoint[16],
            ):
                inst.keypoint[6][2] = 0

            # wrist should be located higher than foot
            if not isHigh(inst.keypoint[11], inst.keypoint[15],
                          inst.keypoint[16]):
                inst.keypoint[11][2] = 0
            if not isHigh(inst.keypoint[12], inst.keypoint[15],
                          inst.keypoint[16]):
                inst.keypoint[12][2] = 0

            # Detect whether instance sit or not
            left_wrist = inst.keypoint[11, :]
            left_knee = inst.keypoint[13, :]
            left_ankle = inst.keypoint[15, :]
            right_wrist = inst.keypoint[12, :]
            right_knee = inst.keypoint[14, :]
            right_ankle = inst.keypoint[16, :]

            if (calDegree(left_wrist, left_knee,
                          left_ankle) < self.sit_threshold
                    or calDegree(right_wrist, right_knee,
                                 right_ankle) < self.sit_threshold):
                inst.sit = True

        return self._old_instances
Exemplo n.º 13
0
def main(args):
    global bev_im
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)
    view = True

    predictor = DefaultPredictor(cfg)
    metadata = MetadataCatalog.get(cfg.DATASETS.TEST)

    f_rgb_detections = open('nuscenes_rgb_detections.txt', "a")

    dataset = nuscenes_object.nuscenes_object(
        '/raid/datasets/extracted_nuscenes',
        split='val',
        velo_kind='lidar_top')
    if not os.path.exists(os.path.join(args.output_dir)):
        os.mkdir(os.path.join(args.output_dir))
    if not os.path.exists(os.path.join(args.output_dir, 'data')):
        os.mkdir(os.path.join(args.output_dir, 'data'))

    current_scene = 0
    current_time = 0

    for idx in range(0, len(dataset)):
        name = dataset.get_idx_name(idx)[1:]

        if current_scene == int(name[:4]) and current_time >= int(name[-4:]):
            continue
        else:
            current_scene = int(name[:4])
            current_time = int(name[-4:])

        print(name)

        ims = []
        for ii in range(0, 6):
            # print(ii)
            im = dataset.get_image_by_name(str(ii) + name)
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            predictions = predictor(im)[0]
            print(predictions)

            instances = predictions["instances"].to(torch.device("cpu"))
            #draw the detections over the original images
            visualizer = Visualizer(im, metadata)

            classes = instances.pred_classes.cpu().numpy()
            class_names = visualizer.metadata.get("thing_classes")
            # print(instances.pred_classes)
            # print(class_names)
            # print(classes)
            labels = [class_names[i] for i in classes]
            vis_colors = [
                colormap(rgb=True, maximum=1)[i] if i < 74 else (0, 0, 0)
                for i in classes
            ]

            if (view):
                visualizer.overlay_instances(
                    boxes=instances.pred_boxes,
                    # masks=instances.pred_masks,
                    labels=_create_text_labels(instances.pred_classes, \
                        instances.scores, \
                        visualizer.metadata.get("thing_classes", None)),
                        # ['Car', 'Pedestrian', 'Cyclist', 'Motorcyclist']),
                    assigned_colors=vis_colors,
                    alpha=0.5,
                )

            for jj in range(len(instances)):

                bbox = instances.pred_boxes[jj].get_numpy()[0]
                output_str = os.path.join(
                    dataset.image_dir, '%s.jpg' %
                    (str(ii) + name)) + " %s %f %.2f %.2f %.2f %.2f\n" % (
                        labels[jj], instances.scores[jj].cpu().numpy(),
                        bbox[0], bbox[1], bbox[2], bbox[3])

                # print(output_str)
                f_rgb_detections.write(output_str)

                det_filename = os.path.join(args.output_dir, 'data',
                                            '%s.txt' % (str(ii) + name))
                with open(det_filename, 'a+') as f:
                    bbox = instances.pred_boxes[jj].get_numpy()[0]
                    output_eval = '%s -1 -1 -10 %.3f %.3f %.3f %.3f -1 -1 -1 -1 -1 -1 -1 %.3f\n' %\
                                (labels[jj], bbox[0], bbox[1], bbox[2], bbox[3], instances.scores[jj].cpu().numpy())
                    f.write(output_eval)
                    # print(output_eval)

            if (view):
                im_view = np.array(visualizer.output.get_image()[:, :, ::-1])
                # im_v = cv2.rectangle(im_view, (0,0), (im_view.shape[1], im_view.shape[0]), colors[ii], thickness = 30)
                if (ii == 0):
                    ims = []
                ims.append(im_view)

        if (view):
            h1 = cv2.hconcat((ims[1], ims[0], ims[2]))
            h2 = cv2.hconcat((ims[5], ims[3], ims[4]))
            v1 = cv2.vconcat((h1, h2))

            cv2.namedWindow('6im', cv2.WINDOW_NORMAL)
            cv2.imshow('6im', v1)

        if (view):
            key = cv2.waitKey(0)

            if key == 115:
                cv2.imwrite('%s_6im.png' % name, v1)
                cv2.imwrite('%s_bev_im.png' % name, bev_im)
                print('SAVING IMAGES')
            if key == 27:
                break  # esc to quit
    def draw_instance_predictions_custom(self,
                                         frame,
                                         predictions,
                                         incl_boxes=True,
                                         incl_labels=True,
                                         incl_scores=True,
                                         target_alpha=None):

        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        scores = scores if incl_scores else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None)
            alpha = 0.3
        else:
            alpha = 0.5

        boxes = boxes if incl_boxes else None
        labels = labels if incl_labels else None
        alpha = alpha if target_alpha is None else target_alpha

        frame_visualizer.overlay_instances(
            boxes=None
            if masks is not None else boxes,  # boxes are a bit distracting
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Exemplo n.º 15
0
    def draw_instance_predictions(self, predictions):
        """
        :param predictions:
        :return: Besides the functions of its mother class method, this method deals with extreme points.
        """
        ext_points = predictions.ext_points if predictions.has(
            "ext_points") else None
        pred_polys = predictions.pred_polys if predictions.has(
            "pred_polys") else None
        if False:
            return super().draw_instance_predictions(predictions)
        else:
            boxes = predictions.pred_boxes if predictions.has(
                "pred_boxes") else None
            scores = predictions.scores if predictions.has("scores") else None
            classes = predictions.pred_classes if predictions.has(
                "pred_classes") else None
            labels = _create_text_labels(
                classes, scores, self.metadata.get("thing_classes", None))
            keypoints = predictions.pred_keypoints if predictions.has(
                "pred_keypoints") else None

            if predictions.has("pred_masks"):
                masks = np.asarray(predictions.pred_masks)
                masks = [
                    GenericMask(x, self.output.height, self.output.width)
                    for x in masks
                ]
            else:
                if predictions.has("pred_polys"):
                    output_height = predictions.image_size[0]
                    output_width = predictions.image_size[1]
                    pred_masks = get_polygon_rles(
                        predictions.pred_polys.flatten(),
                        (output_height, output_width))

                    masks = np.asarray(pred_masks)
                    masks = [
                        GenericMask(x, self.output.height, self.output.width)
                        for x in masks
                    ]
                else:
                    masks = None

            path = predictions.pred_path.numpy() if predictions.has(
                "pred_path") else None

            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                    "thing_colors"):
                colors = [
                    self._jitter(
                        [x / 255 for x in self.metadata.thing_colors[c]])
                    for c in classes
                ]
                alpha = 0.8
            else:
                colors = None
                alpha = 0.5

            if self._instance_mode == ColorMode.IMAGE_BW:
                assert predictions.has(
                    "pred_masks"), "ColorMode.IMAGE_BW requires segmentations"
                self.output.img = self._create_grayscale_image(
                    (predictions.pred_masks.any(dim=0) > 0).numpy())
                alpha = 0.3

            self.overlay_instances(
                masks=masks,
                boxes=boxes,
                labels=labels,
                ext_points=ext_points,
                path=path,
                keypoints=keypoints,
                assigned_colors=colors,
                alpha=alpha,
            )
            return self.output
    def draw_dataset_dict(self, dic, given_colour=None):
        """
        Draw annotations/segmentaions in Detectron2 Dataset format.

        Args:
            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.

        Returns:
            output (VisImage): image object with visualizations.
        """
        annos = dic.get("annotations", None)
        if annos:
            if "segmentation" in annos[0]:
                masks = [x["segmentation"] for x in annos]
            else:
                masks = None
            if "keypoints" in annos[0]:
                keypts = [x["keypoints"] for x in annos]
                keypts = np.array(keypts).reshape(len(annos), -1, 3)
            else:
                keypts = None

            boxes = [
                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
                if len(x["bbox"]) == 4 else x["bbox"] for x in annos
            ]

            colors = None
            category_ids = [x["category_id"] for x in annos]
            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                    "thing_colors"):
                colors = [
                    self._jitter(
                        [x / 255 for x in self.metadata.thing_colors[c]])
                    for c in category_ids
                ]
            names = self.metadata.get("thing_classes", None)
            labels = _create_text_labels(
                category_ids,
                scores=None,
                class_names=[
                    "Hv", "Hp", "CLS", "BL", "PD", "PB", "CC", "LM", "D/P"
                ],
                is_crowd=[x.get("iscrowd", 0) for x in annos],
            )
            labels = None
            boxes = None
            alpha = 0
            self.overlay_instances(
                labels=labels,
                boxes=boxes,
                masks=masks,
                keypoints=keypts,
                assigned_colors=colors,
                alpha=alpha,
                given_colour=given_colour,
            )

        sem_seg = dic.get("sem_seg", None)
        if sem_seg is None and "sem_seg_file_name" in dic:
            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
                sem_seg = Image.open(f)
                sem_seg = np.asarray(sem_seg, dtype="uint8")
        if sem_seg is not None:
            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)

        pan_seg = dic.get("pan_seg", None)
        if pan_seg is None and "pan_seg_file_name" in dic:
            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
                pan_seg = Image.open(f)
                pan_seg = np.asarray(pan_seg)
                from panopticapi.utils import rgb2id

                pan_seg = rgb2id(pan_seg)
        if pan_seg is not None:
            segments_info = dic["segments_info"]
            pan_seg = torch.Tensor(pan_seg)
            self.draw_panoptic_seg(pan_seg,
                                   segments_info,
                                   area_threshold=0,
                                   alpha=0.5)
        return self.output
Exemplo n.º 17
0
def sbd_pred(cv_img):
    '''
    print("sbd prediction start........")
    cv_img = test_imge_generator()
    start_time = time.time()
    print(settings.systemID, settings.prediction_model)
    opt = get_args()

    print("1")
    scores, classes, boxes = efficientDet_pred(cv_img, opt)
    print("2")
    for box_id in range(boxes.shape[0]):
        pred_prob = float(scores[box_id])
        if pred_prob < opt.cls_threshold:
            break
        pred_label = int(classes[box_id])
        xmin, ymin, xmax, ymax = boxes[box_id, :]
        color = colors[pred_label]
        cv2.rectangle(cv_img, (xmin, ymin), (xmax, ymax), color, 2)
        text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.3f' % pred_prob, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        cv2.rectangle(cv_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1)
        cv2.putText(
            cv_img, COCO_CLASSES[pred_label] + ' : %.3f' % pred_prob,
            (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1,
            (255, 255, 255), 1)
    # cv2.imwrite(os.path.join('demo', "demo_result.jpg"), cv_img)
    print("3")


    res1.processingTime = int((stop_time - start_time) * 1000)

    # cv2.destroyAllWindows
    print("total time:", stop_time - start_time)
    return res1
    '''

    # mask rcnn
    obj_predictions, visualized_output = demo.run_on_image(cv_img)
    predictions = obj_predictions["instances"].to("cpu")
    img_result = visualized_output.get_image()[:, :, ::-1]
    
    
    boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
    scores = predictions.scores if predictions.has("scores") else None
    classes = predictions.pred_classes if predictions.has("pred_classes") else None
    labels = _create_text_labels(classes, scores, melbourne_metadata.get("thing_classes", None))
    print("classes.shape, labels",classes.shape, labels)
    # a = torch.from_numpy(classes)
    # print(a)

    objects = []
    # COCO_CLASSES
    if classes is not None:
        for i in range(len(classes)):
            if scores[i]>0.9:
                label = labels[i].split(" ")[0]
                objects.append(label)
                print(label)
                if label == "suitcase":
                    print("suitcase detected")
                elif label == "tray":
                    print("tray")            
                elif label == "soft_bag":
                    print("soft bag")             
                elif label == "extended_handle":
                    response.flags.append(0)                     

    num_tray = sum(['tray' in x for x in objects ]) # tricky: count "tray:xxxx", not just "tray", so not use: num_tray = objects.count("tray")
    num_suitcase = sum(['suitcase' in x for x in objects ])
    num_soft_bag = sum(['soft_bag' in x for x in objects ])
    response = AnalysisResponse()

    if (num_suitcase + num_soft_bag) > 1: # multi bags
        response.flags.append(1)
    
    if num_tray >1:
        response.result = 2 #TubDetected
    elif num_soft_bag > 1 and num_tray ==0: 
        response.result = 3 # TubRequired
    elif num_suitcase > 0 :
        response.result = 1 # NoTubRequired
    else:
        response.result = 0
    print(response.result,response.flags)

    cv2.imwrite("image_sbd.jpg", img_result)

    return response
Exemplo n.º 18
0
    def run_on_image(self, image, debug):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        obj = None
        predictions = self.predictor(image.astype(np.uint8))
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
            if debug:
                print('in panoptic_seg')
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
                if debug:
                    print("in sem_seg")
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)

                if debug:
                    vis_output = visualizer.draw_instance_predictions(
                        predictions=instances)
                    print('in instances')

                #if output is json, debug is false
                if not debug:
                    boxes = instances.pred_boxes.tensor.numpy(
                    ) if instances.has("pred_boxes") else None
                    scores = instances.scores if instances.has(
                        'scores') else None
                    classes = instances.pred_classes if instances.has(
                        "pred_classes") else None
                    labels = _create_text_labels(
                        classes, scores,
                        visualizer.metadata.get("thing_classes", None))
                    keypoints = instances.pred_keypoints if instances.has(
                        "pred_keypoints") else None

                    if instances.has("pred_masks"):
                        masks = np.asarray(instances.pred_masks)
                        masks = [
                            GenericMask(x, visualizer.output.height,
                                        visualizer.output.width) for x in masks
                        ]
                    else:
                        masks = None

                    obj = {}

                    for i, _ in enumerate(labels):
                        tmp = {}
                        split = labels[i].split()
                        tmp['class'] = split[0]
                        tmp['score'] = scores[i].item()
                        tmp['box'] = {}
                        tmp['box']['left-up'] = [
                            boxes[i][0].item(), boxes[i][1].item()
                        ]
                        tmp['box']['right-down'] = [
                            boxes[i][2].item(), boxes[i][3].item()
                        ]
                        tmp['polygons'] = {}

                        if masks is not None:
                            for idx, segment in enumerate(masks[i].polygons):
                                tmp['polygons'][idx] = segment.reshape(
                                    -1, 2).tolist()

                        obj[i] = tmp

        return predictions, vis_output, obj
Exemplo n.º 19
0
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)

        # MOONLITE: zero out frame
        frame_visualizer.output.img = np.zeros(
            frame_visualizer.output.img.shape)

        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None)
            alpha = 0.3
        else:
            alpha = 0.5

        # only keep instance if
        class_names = self.metadata.get("thing_classes", None)
        # get indices of all
        focused_class = "person"
        num_class_instances = [
            i for i in range(num_instances)
            if (class_names[classes[i]] == focused_class)
        ]

        # strip instances down to only instances of our focused class
        boxes = [boxes[i] for i in num_class_instances]
        masks = [masks[i] for i in num_class_instances]
        labels = ["" for i in num_class_instances]
        keypoints = [keypoints[i] for i in num_class_instances]
        colors = [(scores[i], scores[i], scores[i])
                  for i in num_class_instances]

        #for i in range(num_instances):
        #    if class_names[classes[i]] == "person":
        #        colors[i] = (scores[i],scores[i],scores[i])
        #    else:
        #        colors[i] = (0.0,0.0,0.0)
        #
        #    labels[i] = ""

        alpha = 1.0

        frame_visualizer.overlay_instances(
            boxes=None
            if masks is not None else boxes,  # boxes are a bit distracting
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Exemplo n.º 20
0
    def new_draw_instance_predictions(self, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        boxes = predictions.pred_boxes if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes if predictions.has(
            "pred_classes") else None
        labels = _create_text_labels(
            classes, scores,
            CLASS_NAMES)  #self.metadata.get("thing_classes", None))
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        # if predictions.has("pred_masks"):
        #     masks = np.asarray(predictions.pred_masks)
        #     masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
        # else:
        masks = None

        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
                "thing_colors"):
            colors = [
                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
                for c in classes
            ]
            alpha = 0.8
        else:
            colors = None
            alpha = 0.5

        if self._instance_mode == ColorMode.IMAGE_BW:
            self.output.img = self._create_grayscale_image(None)
            # (predictions.pred_masks.any(dim=0) > 0).numpy()
            # if predictions.has("pred_masks")
            # else None

            alpha = 0.3
        print(labels)
        self.overlay_instances(
            masks=masks,
            boxes=boxes,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )
        return self.output


# args = parse_args()
# cfg = modify_cfg(args) #use the same config as training
# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
# predictor = DefaultPredictor(cfg)

# # facemask_1_metadata = MetadataCatalog.get("facemask_1_val")
# facemask_1_metadata, dataset_dicts = register_facemask_dataset(split='val')

# # evaluator = COCOEvaluator("facemask_1_val", ("bbox", "segm"), False, output_dir="./output/")
# # val_loader = build_detection_test_loader(cfg, "facemask_1_val")
# # print(inference_on_dataset(predictor, val_loader, evaluator))
# # # another equivalent way to evaluate the model is to use `trainer.test`

# def get_iou(pred_box, gt_box):
#     """
#     pred_box : the coordinate for predict bounding box
#     gt_box :   the coordinate for ground truth bounding box
#     return :   the iou score
#     the  left-down coordinate of  pred_box:(pred_box[0], pred_box[1])
#     the  right-up coordinate of  pred_box:(pred_box[2], pred_box[3])
#     """
#     # 1.get the coordinate of inters
#     ixmin = max(pred_box[0], gt_box[0])
#     ixmax = min(pred_box[2], gt_box[2])
#     iymin = max(pred_box[1], gt_box[1])
#     iymax = min(pred_box[3], gt_box[3])

#     iw = np.maximum(ixmax-ixmin+1., 0.)
#     ih = np.maximum(iymax-iymin+1., 0.)

#     # 2. calculate the area of inters
#     inters = iw*ih

#     # 3. calculate the area of union
#     uni = ((pred_box[2]-pred_box[0]+1.) * (pred_box[3]-pred_box[1]+1.) +
#            (gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) -
#            inters)

#     # 4. calculate the overlaps between pred_box and gt_box
#     iou = inters / uni

#     return iou

# correct = 0
# total = 0
# count = 0
# for d in dataset_dicts:
#     if count % 100 == 0:
#         print(count, '/', len(dataset_dicts))
#     count += 1
#     im = cv2.imread(d["file_name"])
#     outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
#     # print(outputs["instances"].pred_classes.detach().cpu().numpy())
#     # print([dict['category_id'] for dict in d["annotations"]])
#     pred = np.array(outputs["instances"].pred_classes.detach().cpu().numpy())
#     ground_truth = np.array([dict['category_id'] for dict in d["annotations"]])

#     ml = min(len(pred), len(ground_truth))
#     diff = pred[:ml] - ground_truth[:ml]
#     correct += len(ground_truth) - len(np.where(diff>0)[0])
#     total += len(ground_truth)
#     print(outputs["instances"].pred_boxes)
#     print(dict)

# print(correct, total)
# print('total class accuracy: ', correct/total)

# # #randomly select 5 images to visualize
# # for d in random.sample(dataset_dicts, 5):
# #     im = cv2.imread(d["file_name"])
# #     import time
# #     start = time.time()
# #     outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
# #     print('used', time.time() - start, 'sec')
# #     v = newVisualizer(im[:, :, ::-1],
# #                    metadata=facemask_1_metadata,
# #                    scale=0.5,
# #                    instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
# #     )
# #     out = v.new_draw_instance_predictions(outputs["instances"].to("cpu"))
# #     # cv2_imshow(out.get_image()[:, :, ::-1])
# #     print(out)
# #     plt.imshow(out.get_image()[:, :, ::-1])
# #     plt.show()
Exemplo n.º 21
0
    ppl = VisualizationDemo(cfg)

    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            assert args.input, "The input path(s) was not found"
        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            start_time = time.time()
            predictions, visualized_output = ppl.run_on_image(img)
            num_instances = len(predictions["instances"])
            if num_instances > 0:
                classes = predictions["instances"].pred_classes
                labels = _create_text_labels(
                    classes, predictions["instances"].scores,
                    ppl.metadata.get("thing_classes", None))
                print(labels)
                matchers = ['person']
                matching = [
                    s for s in labels if any(xs in s for xs in matchers)
                ]
                if len(matching) > 0:
                    print("Person detected!")
                    payload = ":"
                    payload = payload.join(matching)
                    payload += " From "
                    payload += args.input[0]
                    publish.single("cameras/person",
                                   payload,
                                   hostname="192.168.1.20")
Exemplo n.º 22
0
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                    #Get class names from dataset train metadata to put on visualization.
                    metadata=my_dataset_train_metadata, 
                    scale=1
                    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    #As described in draw_instance_predictions function of Visualizer, we can get data about each image. 
    #We might need these, but currently only interested in labels as they contain predicted classes for each image and prediction score.
    imagePredClasses = outputs["instances"].pred_classes
    imagePredBoxes = outputs["instances"].pred_boxes
    imagePredScores = outputs["instances"].scores

    #Initialise a dictionary and store inference data (labels with class name and prediction score) for each:
    imageDataArray = {imageBaseName: []}
    imageDataArray[imageBaseName] = _create_text_labels(outputs["instances"].pred_classes, outputs["instances"].scores, my_dataset_train_metadata.get("thing_classes", None))
    print(imageDataArray)

    #data in it. As it loops through all images it will fill the JSON with needed data.

    # Show images with predictions in system window. If not using host, then:
    #cv2.imshow('Inference Preview',out.get_image()[:, :, ::-1])
    # If running inference locally, to view result with timer in system window, uncomment:
    #cv2.waitKey(10000)
    # Save images with predictions to savePath folder and imageName with path to image removed from name.
    savePath = './inferenceContent/output'
    cv2.imwrite(os.path.join(savePath , imageBaseName), out.get_image()[:, :, ::-1])
    
    # Create a JSON object and append results for each image.

Exemplo n.º 23
0
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None
        colors = predictions.COLOR if predictions.has(
            "COLOR") else [None] * len(predictions)
        durations = predictions.ID_duration if predictions.has(
            "ID_duration") else None
        duration_threshold = self.metadata.get("duration_threshold", 0)
        visibilities = None if durations is None else [
            x > duration_threshold for x in durations
        ]

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=colors[i],
                              ttl=8) for i in range(num_instances)
        ]
        if not predictions.has("COLOR"):
            colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.reset_image(
                frame_visualizer._create_grayscale_image((masks.any(
                    dim=0) > 0).numpy() if masks is not None else None))
            alpha = 0.3
        else:
            alpha = 0.5

        labels = (
            None if labels is None else
            [y[0] for y in filter(lambda x: x[1], zip(labels, visibilities))]
        )  # noqa
        assigned_colors = (
            None if colors is None else
            [y[0] for y in filter(lambda x: x[1], zip(colors, visibilities))]
        )  # noqa
        frame_visualizer.overlay_instances(
            boxes=None if masks is not None else
            boxes[visibilities],  # boxes are a bit distracting
            masks=None if masks is None else masks[visibilities],
            labels=labels,
            keypoints=None if keypoints is None else keypoints[visibilities],
            assigned_colors=assigned_colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Exemplo n.º 24
0
                if not color_frame:
                    continue
                # Convert images to numpy arrays
                img = np.asanyarray(color_frame.get_data())
                start_time = time.time()
                obj_predictions, visualized_output = demo.run_on_image(img)
                predictions = obj_predictions["instances"].to("cpu")

                boxes = predictions.pred_boxes if predictions.has(
                    "pred_boxes") else None
                scores = predictions.scores if predictions.has(
                    "scores") else None
                classes = predictions.pred_classes if predictions.has(
                    "pred_classes") else None
                labels = _create_text_labels(
                    classes, scores,
                    melbourne_metadata.get("thing_classes", None))

                cv2.imshow("front camera",
                           visualized_output.get_image()[:, :, ::-1])
                if cv2.waitKey(1) == 27:
                    break  # esc to quit

                # TODO
                # event_list, pose = pose_dim_estimation(portrait,profile,aligned_depth_frame, color_frame,
                #                        depth_intrin,depth_to_color_extrin,width,height)
                event_list = ["Wheel at front", "The bag is upright"]

                from detectron2.event_output import output_json
                # mock image
                image_file = "/home/don/code/BagAnalysis/3DImaging/GPU_based_solution/Deeplearning/detectron2/detectron2/data/melbourne/train_melb_mask/img_860.jpg"