Python VideoVisualizer.draw_instance_predictions示例，detectron2.utils.video_visualizer.VideoVisualizer.draw_instance_predictions Python示例

示例#1

0

显示文件

文件： demo.py 项目： Lehsuby/fire-roadar

    def predict(self, obj, mode: str = "image"):
        # Make prediction
        if mode == "image":
            image = obj[:, :, ::-1]
            image_visualizer = Visualizer(image,
                                          metadata=self.metadata,
                                          instance_mode=self.instance_mode,
                                          scale=1.2)
            outputs = self.predictor(obj)
            instances = outputs["instances"].to("cpu")
            instances.remove('pred_classes')
            vis_output = image_visualizer.draw_instance_predictions(instances)
        elif mode == "video":
            video_visualizer = VideoVisualizer(
                metadata=self.metadata, instance_mode=self.instance_mode)
            outputs, vis_output = [], []
            while obj.isOpened():
                success, frame = obj.read()
                if success:
                    output = self.predictor(frame)
                    outputs.append(output)
                    instances = output["instances"].to("cpu")

                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, instances)
                    vis_frame = cv2.cvtColor(vis_frame.get_image(),
                                             cv2.COLOR_RGB2BGR)

                    vis_output.append(vis_frame)
                else:
                    break

        return outputs, vis_output

示例#2

0

显示文件

class AnnotateVideo(Pipeline):
    """Pipeline task for video annotation."""

    def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        super().__init__()

    def map(self, data):
        dst_image = data["image"].copy()
        data[self.dst] = dst_image

        self.annotate_frame_num(data)
        self.annotate_predictions(data)

        return data

    def annotate_frame_num(self, data):
        dst_image = data[self.dst]
        frame_idx = data["frame_num"]

        put_text(dst_image, f"{frame_idx:04d}", (0, 0),
                 color=colors.get("white").to_bgr(),
                 bg_color=colors.get("black").to_bgr(),
                 org_pos="tl")

    def annotate_predictions(self, data):
        if "predictions" not in data:
            return

        dst_image = data[self.dst]
        dst_image = dst_image[:, :, ::-1]  # Convert OpenCV BGR to RGB format
        predictions = data["predictions"]

        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_image = self.video_visualizer.draw_panoptic_seg_predictions(dst_image,
                                                                            panoptic_seg.to(self.cpu_device),
                                                                            segments_info)
        elif "sem_seg" in predictions:
            sem_seg = predictions["sem_seg"].argmax(dim=0)
            vis_image = self.video_visualizer.draw_sem_seg(dst_image,
                                                           sem_seg.to(self.cpu_device))
        elif "instances" in predictions:
            instances = predictions["instances"]
            vis_image = self.video_visualizer.draw_instance_predictions(dst_image,
                                                                        instances.to(self.cpu_device))

        # Converts RGB format to OpenCV BGR format
        vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR)
        data[self.dst] = vis_image

示例#3

0

显示文件

文件： VideoPrediction2.py 项目： Harish-Soni1/VideoAnalytics

def prediction_on_video(video):
    model = "modelsfiles/model_final.pth"
    config = "modelsfiles/config.yml"
    threshold = 0.5
    save_path = "output"
    predictor, cfg = get_model(model, config, threshold)
    parser = argparse.ArgumentParser(
        description='Detect objects from webcam images')
    parser.add_argument('-s',
                        '--show',
                        default=True,
                        action="store_false",
                        help='Show output')
    parser.add_argument(
        '-sp',
        '--save_path',
        type=str,
        default='',
        help='Path to save the output. If None output won\'t be saved')
    args = parser.parse_args()
    print("Started")
    video_file = video  #"/home/oem/Downloads/video.mp4"
    cap = cv2.VideoCapture(video_file)

    if not cap.isOpened():
        print("Error opening video stream or file")
    MetadataCatalog.get("customtrain").thing_classes = [
        'ear plugs', 'welding shield'
    ]
    metadata = MetadataCatalog.get("customtrain")

    while cap.isOpened():
        ret, image = cap.read()

        outputs = predictor(image)

        #v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
        #VideoVisualizer
        #v = Visualizer(image[:, :, ::-1], metadata, scale=1.2)

        video_visualizer = VideoVisualizer(metadata, instancemode)
        v = video_visualizer.draw_instance_predictions(
            image, outputs["instances"].to("cpu"))
        #v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

        if args.show:
            ui_main_window = Ui_MainWindow()
            ui_main_window.displayImage(
                cv2.imshow('object_detection',
                           v.get_image()[:, :, ::-1]))
            #cv2.imshow('object_detection', v.get_image()[:, :, ::-1])
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

示例#4

0

显示文件

文件： test_detectron2_video.py 项目： AlexanderFabisch/annotator

def main():
    args = parse_args()

    with open(args.config, "r") as f:
        config = yaml.safe_load(f)
    if "classes" not in config:
        raise Exception("Could not find class names")
    n_classes = len(config["classes"])
    classes = config["classes"]

    cfg = get_cfg()
    cfg.merge_from_file(args.model_config)
    cfg.DATASETS.TRAIN = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 50000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)
    if args.model_weights is None:
        cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    else:
        cfg.MODEL.WEIGHTS = args.model_weights
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
    cfg.DATASETS.TEST = ("custom_test",)

    predictor = DefaultPredictor(cfg)

    DatasetCatalog.register("custom_test", lambda d="test": None)
    MetadataCatalog.get("custom_test").set(thing_classes=classes)
    custom_metadata = MetadataCatalog.get("custom_test")

    os.makedirs(args.output, exist_ok=True)

    cap = cv2.VideoCapture(args.video)
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    vis = VideoVisualizer(metadata=custom_metadata)
    for i in tqdm.tqdm(range(0, n_frames, args.skip_frames)):
        assert cap.isOpened()
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, image = cap.read()
        assert success
        outputs = predictor(image)
        v = vis.draw_instance_predictions(
            image[:, :, ::-1], outputs["instances"].to("cpu"))
        filename = os.path.join(args.output, "prediction_%09d.jpg" % i)
        cv2.imwrite(filename, v.get_image()[:, :, ::-1])
    plt.show()

示例#5

0

显示文件

文件： annotate_video.py 项目： artish1/trashpanda-ds

class AnnotateVideo(Pipeline):
    """Pipeline task for video annotation."""
    def __init__(self,
                 dst,
                 metadata_name,
                 instance_mode=ColorMode.IMAGE,
                 frame_num=True,
                 predictions=True,
                 pose_flows=True):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.frame_num = frame_num
        self.predictions = predictions
        self.pose_flows = pose_flows

        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)

        super().__init__()

    def map(self, data):
        dst_image = data["image"].copy()
        data[self.dst] = dst_image

        if self.frame_num:
            self.annotate_frame_num(data)
        if self.predictions:
            self.annotate_predictions(data)
        if self.pose_flows:
            self.annotate_pose_flows(data)

        return data

    def annotate_frame_num(self, data):
        dst_image = data[self.dst]
        frame_idx = data["frame_num"]

        put_text(dst_image,
                 f"{frame_idx:04d}", (0, 0),
                 color=colors.get("white").to_bgr(),
                 bg_color=colors.get("black").to_bgr(),
                 org_pos="tl")

    def annotate_predictions(self, data):
        if "predictions" not in data:
            return

        dst_image = data[self.dst]
        dst_image = dst_image[:, :, ::-1]  # Convert OpenCV BGR to RGB format
        predictions = data["predictions"]

        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_image = self.video_visualizer.draw_panoptic_seg_predictions(
                dst_image, panoptic_seg.to(self.cpu_device), segments_info)
        elif "sem_seg" in predictions:
            sem_seg = predictions["sem_seg"].argmax(dim=0)
            vis_image = self.video_visualizer.draw_sem_seg(
                dst_image, sem_seg.to(self.cpu_device))
        elif "instances" in predictions:
            instances = predictions["instances"]
            vis_image = self.video_visualizer.draw_instance_predictions(
                dst_image, instances.to(self.cpu_device))

        # Converts RGB format to OpenCV BGR format
        vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR)
        data[self.dst] = vis_image

    def annotate_pose_flows(self, data):
        if "pose_flows" not in data:
            return

        predictions = data["predictions"]
        instances = predictions["instances"]
        keypoints = instances.pred_keypoints.cpu().numpy()
        l_pairs = [
            (0, 1),
            (0, 2),
            (1, 3),
            (2, 4),  # Head
            (5, 6),
            (5, 7),
            (7, 9),
            (6, 8),
            (8, 10),
            (6, 12),
            (5, 11),
            (11, 12),  # Body
            (11, 13),
            (12, 14),
            (13, 15),
            (14, 16)
        ]

        dst_image = data[self.dst]
        height, width = dst_image.shape[:2]

        pose_flows = data["pose_flows"]
        pose_colors = list(colors.items())
        pose_colors_len = len(pose_colors)

        for idx, pose_flow in enumerate(pose_flows):
            pid = pose_flow["pid"]
            pose_color_idx = ((pid * 10) % pose_colors_len +
                              pose_colors_len) % pose_colors_len
            pose_color_bgr = pose_colors[pose_color_idx][1].to_bgr()
            (start_x, start_y, end_x, end_y) = pose_flow["box"].astype("int")
            cv2.rectangle(dst_image, (start_x, start_y), (end_x, end_y),
                          pose_color_bgr, 2, cv2.LINE_AA)
            put_text(dst_image,
                     f"{pid:d}", (start_x, start_y),
                     color=pose_color_bgr,
                     bg_color=colors.get("black").to_bgr(),
                     org_pos="tl")

            instance_keypoints = keypoints[idx]
            l_points = {}
            p_scores = {}
            # Draw keypoints
            for n in range(instance_keypoints.shape[0]):
                score = instance_keypoints[n, 2]
                if score <= 0.05:
                    continue
                cor_x = int(np.clip(instance_keypoints[n, 0], 0, width))
                cor_y = int(np.clip(instance_keypoints[n, 1], 0, height))
                l_points[n] = (cor_x, cor_y)
                p_scores[n] = score
                cv2.circle(dst_image, (cor_x, cor_y), 2, pose_color_bgr, -1)
            # Draw limbs
            for i, (start_p, end_p) in enumerate(l_pairs):
                if start_p in l_points and end_p in l_points:
                    start_xy = l_points[start_p]
                    end_xy = l_points[end_p]
                    start_score = p_scores[start_p]
                    end_score = p_scores[end_p]
                    cv2.line(dst_image, start_xy, end_xy, pose_color_bgr,
                             int(2 * (start_score + end_score) + 1))

示例#6

0

显示文件

文件： main.py 项目： kiyoon/detectron2_predict_multigpu

            frame = cv2.imread(jpg)

            visualised_jpg_path = os.path.join(args.output, 'detection',
                                               image_basename)
            assert not os.path.isfile(visualised_jpg_path), visualised_jpg_path

            predictions = predictor(frame)["instances"].to("cpu")
            output_dict = {
                'num_detections': len(predictions),
                'detection_boxes': predictions.pred_boxes.tensor.numpy(),
                'detection_classes': predictions.pred_classes.numpy(),
                'detection_score': predictions.scores.numpy()
            }
            all_detection_outputs[frame_num] = output_dict

            vis_frame = video_visualiser.draw_instance_predictions(
                frame[:, :, ::-1], predictions)
            cv2.imwrite(visualised_jpg_path, vis_frame.get_image()[:, :, ::-1])

        with open(predictions_save_path, 'wb') as handle:
            pickle.dump(all_detection_outputs,
                        handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

    elif args.video_input:
        demo = VisualizationDemo(cfg)

        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

示例#7

0

显示文件

文件： final00.py 项目： maheshs11/computer-vision

                isColor=True,
            )"""

while (cap.isOpened()):

    ret, frame = cap.read(0)
    frame = cv2.resize(frame, (224, 224))
    print(fps)
    print(num_frames)

    try:
        outputs = predictor(frame)
        #v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
        v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                            instance_mode=ColorMode.IMAGE_BW)
        v = v.draw_instance_predictions(frame, outputs["instances"].to('cpu'))
        print(outputs["instances"].pred_classes)
        omt = str(outputs["instances"].pred_classes)
        outpclass = omt[8:9]
        print(outpclass)
        """while (cap.isOpened()): #outpclass is printing ang giving 0 if 0 comes then action this loop
          if outpclass == '0':
              #unlock(8) make ur own function to test 
              time.sleep(10) #Lock will remains open for 10 seconds. make this run in loop
              #lock(8)
              #GPIO.cleanup(8)"""

        #out.write(v.get_image())
    #cv2_imshow("Moda", v.get_image())
    except:
        break

示例#8

0

显示文件

    output = predictor(frame)

    try:
        # get first person detected

        print(output["instances"].pred_boxes)
        classes = output["instances"].pred_classes
        classes = classes.numpy()
        pos = np.where(classes == 0)[0][0]

        v = VideoVisualizer(
            metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
            instance_mode=ColorMode.IMAGE,
        )

        v = v.draw_instance_predictions(
            frame, output["instances"][int(pos)].to("cpu"))

        box = output["instances"][int(pos)].pred_boxes
        startX, startY, endX, endY = box.tensor.numpy().astype(
            "int").tolist()[0]
        detected_person = frame[startY:endY, startX:endX]

        cv2.imshow("images", v.get_image()[:, :, ::-1])

        # cv2.imshow("images", v.get_image()[:, :, ::-1])
    except:
        cv2.imshow("images", v.get_image()[:, :, ::-1])

    # if output["instances"]:
    #     # get first person detected
    #     classes = output["instances"].pred_classes

示例#9

0

显示文件

文件： run.py 项目： DinoHub/RecycleNet

        # curr_inference_time = toc - tic
        # inference_time_cma = (n * inference_time_cma + curr_inference_time) / (n+1)

        # print('cma inference time: {:0.3} sec'.format(inference_time_cma))

        # tic2 = time.time()

        drawned_frame = frame.copy()  # make a copy of the original frame

        # draw on the frame with the res
        # v = Visualizer(drawned_frame[:, :, ::-1],
        #             metadata=plastic_metadata,
        #             scale=0.8,
        #             instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
        # )
        v_out = viz.draw_instance_predictions(drawned_frame,
                                              res["instances"].to("cpu"))
        # v_out = viz.draw_instance_predictions(drawned_frame[:, :, ::-1], res["instances"].to("cpu"))
        drawned_frame = v_out.get_image()

        cv2.imshow(win_name, drawned_frame)
        # toc2 = time.time()
        vw.write(drawned_frame)

        # curr_drawing_time = toc2 - tic2
        # drawing_time_cma = (n * drawing_time_cma + curr_drawing_time) / (n+1)

        # print('cma draw time: {:0.3} sec'.format(drawing_time_cma))

        if cv2.waitKey(1) & 0xff == ord('q'):
            break