Пример #1
0
    def predict(self, obj, mode: str = "image"):
        # Make prediction
        if mode == "image":
            image = obj[:, :, ::-1]
            image_visualizer = Visualizer(image,
                                          metadata=self.metadata,
                                          instance_mode=self.instance_mode,
                                          scale=1.2)
            outputs = self.predictor(obj)
            instances = outputs["instances"].to("cpu")
            instances.remove('pred_classes')
            vis_output = image_visualizer.draw_instance_predictions(instances)
        elif mode == "video":
            video_visualizer = VideoVisualizer(
                metadata=self.metadata, instance_mode=self.instance_mode)
            outputs, vis_output = [], []
            while obj.isOpened():
                success, frame = obj.read()
                if success:
                    output = self.predictor(frame)
                    outputs.append(output)
                    instances = output["instances"].to("cpu")

                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, instances)
                    vis_frame = cv2.cvtColor(vis_frame.get_image(),
                                             cv2.COLOR_RGB2BGR)

                    vis_output.append(vis_frame)
                else:
                    break

        return outputs, vis_output
Пример #2
0
    def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        super().__init__()
Пример #3
0
class AnnotateVideo(Pipeline):
    """Pipeline task for video annotation."""

    def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        super().__init__()

    def map(self, data):
        dst_image = data["image"].copy()
        data[self.dst] = dst_image

        self.annotate_frame_num(data)
        self.annotate_predictions(data)

        return data

    def annotate_frame_num(self, data):
        dst_image = data[self.dst]
        frame_idx = data["frame_num"]

        put_text(dst_image, f"{frame_idx:04d}", (0, 0),
                 color=colors.get("white").to_bgr(),
                 bg_color=colors.get("black").to_bgr(),
                 org_pos="tl")

    def annotate_predictions(self, data):
        if "predictions" not in data:
            return

        dst_image = data[self.dst]
        dst_image = dst_image[:, :, ::-1]  # Convert OpenCV BGR to RGB format
        predictions = data["predictions"]

        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_image = self.video_visualizer.draw_panoptic_seg_predictions(dst_image,
                                                                            panoptic_seg.to(self.cpu_device),
                                                                            segments_info)
        elif "sem_seg" in predictions:
            sem_seg = predictions["sem_seg"].argmax(dim=0)
            vis_image = self.video_visualizer.draw_sem_seg(dst_image,
                                                           sem_seg.to(self.cpu_device))
        elif "instances" in predictions:
            instances = predictions["instances"]
            vis_image = self.video_visualizer.draw_instance_predictions(dst_image,
                                                                        instances.to(self.cpu_device))

        # Converts RGB format to OpenCV BGR format
        vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR)
        data[self.dst] = vis_image
def prediction_on_video(video):
    model = "modelsfiles/model_final.pth"
    config = "modelsfiles/config.yml"
    threshold = 0.5
    save_path = "output"
    predictor, cfg = get_model(model, config, threshold)
    parser = argparse.ArgumentParser(
        description='Detect objects from webcam images')
    parser.add_argument('-s',
                        '--show',
                        default=True,
                        action="store_false",
                        help='Show output')
    parser.add_argument(
        '-sp',
        '--save_path',
        type=str,
        default='',
        help='Path to save the output. If None output won\'t be saved')
    args = parser.parse_args()
    print("Started")
    video_file = video  #"/home/oem/Downloads/video.mp4"
    cap = cv2.VideoCapture(video_file)

    if not cap.isOpened():
        print("Error opening video stream or file")
    MetadataCatalog.get("customtrain").thing_classes = [
        'ear plugs', 'welding shield'
    ]
    metadata = MetadataCatalog.get("customtrain")

    while cap.isOpened():
        ret, image = cap.read()

        outputs = predictor(image)

        #v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
        #VideoVisualizer
        #v = Visualizer(image[:, :, ::-1], metadata, scale=1.2)

        video_visualizer = VideoVisualizer(metadata, instancemode)
        v = video_visualizer.draw_instance_predictions(
            image, outputs["instances"].to("cpu"))
        #v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

        if args.show:
            ui_main_window = Ui_MainWindow()
            ui_main_window.displayImage(
                cv2.imshow('object_detection',
                           v.get_image()[:, :, ::-1]))
            #cv2.imshow('object_detection', v.get_image()[:, :, ::-1])
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
def main():
    args = parse_args()

    with open(args.config, "r") as f:
        config = yaml.safe_load(f)
    if "classes" not in config:
        raise Exception("Could not find class names")
    n_classes = len(config["classes"])
    classes = config["classes"]

    cfg = get_cfg()
    cfg.merge_from_file(args.model_config)
    cfg.DATASETS.TRAIN = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 50000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)
    if args.model_weights is None:
        cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    else:
        cfg.MODEL.WEIGHTS = args.model_weights
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
    cfg.DATASETS.TEST = ("custom_test",)

    predictor = DefaultPredictor(cfg)

    DatasetCatalog.register("custom_test", lambda d="test": None)
    MetadataCatalog.get("custom_test").set(thing_classes=classes)
    custom_metadata = MetadataCatalog.get("custom_test")

    os.makedirs(args.output, exist_ok=True)

    cap = cv2.VideoCapture(args.video)
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    vis = VideoVisualizer(metadata=custom_metadata)
    for i in tqdm.tqdm(range(0, n_frames, args.skip_frames)):
        assert cap.isOpened()
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, image = cap.read()
        assert success
        outputs = predictor(image)
        v = vis.draw_instance_predictions(
            image[:, :, ::-1], outputs["instances"].to("cpu"))
        filename = os.path.join(args.output, "prediction_%09d.jpg" % i)
        cv2.imwrite(filename, v.get_image()[:, :, ::-1])
    plt.show()
Пример #6
0
def run_on_video(video, cfg):
    """
    Visualizes predictions on frames of the input video.
    Args:
        video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
            either a webcam or a video file.
    Yields:
        ndarray: BGR visualizations of each video frame.
    """
    predictor = DefaultPredictor(cfg)
    metadata = MetadataCatalog.get("__unused")
    video_visualizer = VideoVisualizer(metadata)

    def process_predictions(frame, predictions):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        predictions = predictions["instances"].to('cpu')
        vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

        # Converts Matplotlib RGB format to OpenCV BGR format
        vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
        return vis_frame

    def frame_from_video(video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    frame_gen = frame_from_video(video)
    for frame in frame_gen:
        frame = np.array(frame)
        yield process_predictions(frame, predictor(frame))
Пример #7
0
    def run(self, video):
        video_visualizer = VideoVisualizer(self.metadata, ColorMode.IMAGE)

        def process_predictions(frame, predictions):
            predictions = predictions["instances"].to(self.cpu_device)

            vis_frame = video_visualizer.draw_instance_predictions(
                frame, predictions)
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

            return vis_frame, predictions

        frame_gen = self._frame_from_video(video)

        if self.parallel:
            buffer_size = self.predictor.default_buffer_size
            frame_data = deque()

            for cnt, (frame, frame_pos) in enumerate(frame_gen):
                frame_data.append([frame, frame_pos])
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame, frame_pos = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield frame_pos, process_predictions(frame, predictions)

            while len(frame_data):
                frame, frame_pos = frame_data.popleft()
                predictions = self.predictor.get()
                yield frame_pos, process_predictions(frame, predictions)
        else:
            for frame, frame_pos in frame_gen:
                yield frame_pos, process_predictions(frame,
                                                     self.predictor(frame))
Пример #8
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info
                )
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
                )
            elif "proposals" in predictions:
                predictions = predictions["proposals"].to(self.cpu_device)
                predictions.pred_boxes = predictions.proposal_boxes
                predictions.scores = predictions.objectness_logits
                predictions.pred_classes[:] = -1
                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #9
0
def run_on_video(video):
    """
    Visualizes predictions on frames of the input video.

    Args:
        video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
            either a webcam or a video file.

    Yields:
        ndarray: BGR visualizations of each video frame.
    """
    video_visualizer = VideoVisualizer(metadata, instancemode)

    def process_predictions(frame, predictions):
        # frame = cv2.flip(frame, 1)  # just for flipping the camera...
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # if "panoptic_seg" in predictions:
        #     panoptic_seg, segments_info = predictions["panoptic_seg"]
        #     vis_frame = video_visualizer.draw_panoptic_seg_predictions(
        #         frame, panoptic_seg.to("cpu"), segments_info
        #     )
        # elif "instances" in predictions:
        predictions = predictions["instances"].to("cpu")
        # predictions.remove('pred_masks')
        vis_frame = video_visualizer.draw_instance_predictions(
            frame, predictions)
        # elif "sem_seg" in predictions:
        #     vis_frame = video_visualizer.draw_sem_seg(
        #         frame, predictions["sem_seg"].argmax(dim=0).to("cpu")
        #     )

        # Converts Matplotlib RGB format to OpenCV BGR format
        vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

        return vis_frame

    frame_gen = _frame_from_video(video)

    # if self.parallel:
    #     buffer_size = self.predictor.default_buffer_size
    #
    #     frame_data = deque()
    #
    #     for cnt, frame in enumerate(frame_gen):
    #         frame_data.append(frame)
    #         self.predictor.put(frame)
    #
    #         if cnt >= buffer_size:
    #             frame = frame_data.popleft()
    #             predictions = self.predictor.get()
    #             yield process_predictions(frame, predictions)
    #
    #     while len(frame_data):
    #         frame = frame_data.popleft()
    #         predictions = self.predictor.get()
    #         yield process_predictions(frame, predictions)
    # else:
    for frame in frame_gen:
        yield process_predictions(frame, predictor(frame))
Пример #10
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """

        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # convert from RGB to BGR
            # choose mode
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info
                )
            # instance segmentation mode 
            # what is in predicitons[] 
            """
            predictions
                A list of dictionaries 
                each dict contains one key "instances"
                the key maps to class "Instances" and has the following keys: 
                    "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"

            """   
            elif "instances" in predictions:

                predictions = predictions["instances"].to(self.cpu_device)
                print(predictions.pred_masks)
                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
                )
Пример #11
0
    def __init__(self,
                 dst,
                 metadata_name,
                 instance_mode=ColorMode.IMAGE,
                 frame_num=True,
                 predictions=True,
                 pose_flows=True):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.frame_num = frame_num
        self.predictions = predictions
        self.pose_flows = pose_flows

        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)

        super().__init__()
    def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")

        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode
        self.parallel = parallel

        if self.parallel == 1:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)
Пример #13
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "instances" in predictions:
                predictions['instances'] = predictions['instances'].to('cpu')
                indices = predictions['instances'].pred_classes == 1
                predictions['instances'] = predictions['instances'][indices]
                if (len(predictions['instances']) == 0):
                    vis_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                else:
                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, predictions['instances'])
                    # Converts Matplotlib RGB format to OpenCV BGR format
                    vis_frame = cv2.cvtColor(vis_frame.get_image(),
                                             cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #14
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            print(frame, type(frame))
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                # panoptic_seg, segments_info = predictions["panoptic_seg"]
                # vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                #     frame, panoptic_seg.to(self.cpu_device), segments_info
                # )
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                success, ocean_frame = ocean.read()
                res = mask(panoptic_seg, segments_info, frame, ocean_frame, 21, 0.8)
                res = mask(panoptic_seg, segments_info, res, sky, 40, 0.8)
                # res = mask(panoptic_seg, segments_info, res, graffiti, 50, 0.5)
                img = cv2.cvtColor(res, cv2.COLOR_BGRA2BGR)
                return np.array(img)
            # Converts Matplotlib RGB format to OpenCV BGR format
        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #15
0
    def run_on_video(self, video, predictions, effect_type, current_frame):
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(cnt, frame, predictions):

            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            if "panoptic_seg" in predictions:
                # vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                #     frame, panoptic_seg.to(self.cpu_device), segments_info
                # )
                vis_frame = 0
            elif "instances" in predictions:
                print("video instances")
                vis_frame = video_visualizer.draw_instance_predictions(
                    cnt, frame, predictions)
            elif "sem_seg" in predictions:
                print("sem_seg")
                vis_frame = None

            vis_frame = video_visualizer.draw_instance_predictions(
                frame, predictions, effect_type)
            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()
            for cnt, frame in enumerate(frame_gen):
                # print(cnt,1)
                frame_data.append(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                yield process_predictions(frame, predictions)
        else:
            for cnt, frame in enumerate(frame_gen):
                # print("non-parallel prediction",cnt)
                # if predictions[cnt]['current_frame'] == cnt:
                if current_frame <= cnt and cnt < current_frame + 30:
                    yield process_predictions(cnt, frame, predictions[cnt])
Пример #16
0
    def run_on_video(self, video):

        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # conversion vers BGR pour openCv
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)

        if self.parallel:  #le rendering / plusieurs gpus est activé, pas encore supporté ici
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame,
                                          self.predictor.predictions(frame))
Пример #17
0
    def run_on_video(self, video):
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(cnt, frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                return cnt, panoptic_seg, segments_info
            elif "instances" in predictions:
                print("instances")
                predictions = predictions["instances"].to(self.cpu_device)
                return cnt, predictions
            elif "sem_seg" in predictions:
                print("sem_seg")
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
                return cnt, vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()
            for cnt, frame in enumerate(frame_gen):
                # print(cnt,1)
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for cnt, frame in enumerate(frame_gen):
                # print("non-parallel prediction",cnt)
                yield process_predictions(cnt, frame, self.predictor(frame))
Пример #18
0
    def run_on_video(self, video, dictionary):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions, dictionary):
            resulte = 0
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)

            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                #判断框
                max_inform_keypoint = self.search_max_box_information(
                    predictions)
                if (max_inform_keypoint != None):
                    #画框
                    bbox = max_inform_keypoint[0]
                    frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                          (int(bbox[2]), int(bbox[3])),
                                          (0, 255, 0), 2)
                    # 画关键点
                    keypoint_list = max_inform_keypoint[1]
                    for i, keypoint in enumerate(keypoint_list):
                        circle_coord = (int(keypoint[0]), int(keypoint[1]))
                        frame = cv2.putText(frame, str(i), circle_coord,
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                            (255, 0, 0), 2)
                    #画角度
                    frame = self.write(frame, dictionary["angle"],
                                       keypoint_list)
                    #画距离
                    frame = self.write_distance(frame, dictionary["distance"],
                                                keypoint_list)
                    #判断仰卧起坐
                    resulte = self.poll_situp(keypoint_list, dictionary)
                    #存结果
                    # save_json = self.save_resulte(keypoint_list,dictionary)

                    vis_frame = frame[..., ::-1]
                else:
                    vis_frame = frame[..., ::-1]

                # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

            return {
                "vis_frame": vis_frame,
                "resulte": resulte,
                "max_inform_keypoint": max_inform_keypoint
            }

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame),
                                          dictionary)
Пример #19
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer_object = VideoVisualizer(self.metadata_object,
                                                  self.instance_mode)
        video_visualizer_keypoint = VideoVisualizer(self.metadata_keypoint,
                                                    self.instance_mode)

        def process_predictions(frame, predictions_object,
                                predictions_keypoint):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            blank_image = np.zeros((frame.shape[0], frame.shape[1], 3),
                                   np.uint8)

            if "instances" in predictions_object:
                predictions_object = predictions_object["instances"].to(
                    self.cpu_device)
                boxes_area = predictions_object.get('pred_boxes').area()

                if boxes_area.nelement() != 0:
                    max_val, max_idx = torch.max(boxes_area, dim=0)

                    pred_boxes_object = predictions_object.get('pred_boxes')[
                        max_idx.item()]
                    scores_object = predictions_object.get('scores')[
                        max_idx.item()]
                    pred_classes_object = predictions_object.get(
                        'pred_classes')[max_idx.item()]

                    draw_instance_object = Instances([1280, 720])

                    draw_instance_object.set('pred_boxes', pred_boxes_object)
                    draw_instance_object.set(
                        'scores', torch.unsqueeze(scores_object, dim=0))
                    draw_instance_object.set(
                        'pred_classes',
                        torch.unsqueeze(pred_classes_object, dim=0))

                    self.data_json['object_detection'][
                        'pred_boxes'] = predictions_object.get('pred_boxes')[
                            max_idx.item()].tensor.numpy().tolist()
                    self.data_json['object_detection'][
                        'scores'] = predictions_object.get('scores')[
                            max_idx.item()].numpy().tolist()
                    vis_frame = video_visualizer_object.draw_instance_predictions(
                        blank_image, draw_instance_object)
                else:
                    self.data_json['object_detection']['pred_boxes'] = []
                    self.data_json['object_detection']['scores'] = []
                    vis_frame = video_visualizer_object.draw_instance_predictions(
                        blank_image, predictions_object)

            if "instances" in predictions_keypoint:
                predictions_keypoint = predictions_keypoint["instances"].to(
                    self.cpu_device)
                boxes_area = predictions_keypoint.get('pred_boxes').area()

                if boxes_area.nelement() != 0:
                    max_val, max_idx = torch.max(boxes_area, dim=0)

                    pred_boxes_keypoint = predictions_keypoint.get(
                        'pred_boxes')[max_idx.item()]
                    scores_keypoint = predictions_keypoint.get('scores')[
                        max_idx.item()]
                    pred_classes_keypoint = predictions_keypoint.get(
                        'pred_classes')[max_idx.item()]
                    pred_keypoints_keypoint = predictions_keypoint.get(
                        'pred_keypoints')[max_idx.item()]

                    draw_instance_keypoint = Instances([1280, 720])
                    draw_instance_keypoint.set('pred_boxes',
                                               pred_boxes_keypoint)
                    draw_instance_keypoint.set(
                        'scores', torch.unsqueeze(scores_keypoint, dim=0))
                    draw_instance_keypoint.set(
                        'pred_classes',
                        torch.unsqueeze(pred_classes_keypoint, dim=0))
                    draw_instance_keypoint.set(
                        'pred_keypoints',
                        torch.unsqueeze(pred_keypoints_keypoint, dim=0))

                    self.data_json['keypoint_detection'][
                        'pred_boxes'] = predictions_keypoint.get('pred_boxes')[
                            max_idx.item()].tensor.numpy().tolist()
                    self.data_json['keypoint_detection'][
                        'scores'] = predictions_keypoint.get('scores')[
                            max_idx.item()].numpy().tolist()
                    self.data_json['keypoint_detection'][
                        'pred_keypoints'] = predictions_keypoint.get(
                            'pred_keypoints')[max_idx.item()].numpy().tolist()
                    vis_frame = video_visualizer_keypoint.draw_instance_predictions(
                        vis_frame.get_image(), draw_instance_keypoint)
                else:
                    self.data_json['keypoint_detection']['pred_boxes'] = []
                    self.data_json['keypoint_detection']['scores'] = []
                    self.data_json['keypoint_detection']['pred_keypoints'] = []
                    vis_frame = video_visualizer_keypoint.draw_instance_predictions(
                        vis_frame.get_image(), predictions_keypoint)

            # head pose estimation
            predictions, bounding_box, face_keypoints, w, face_area = head_pose_estimation(
                frame, self.mtcnn, self.head_pose_module, self.transformations,
                self.softmax, self.idx_tensor)
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

            if len(face_area) != 0:
                max_val, max_idx = torch.max(torch.Tensor(face_area), dim=0)

                self.data_json['head_pose_estimation'][
                    'predictions'] = predictions[max_idx.item()]
                self.data_json['head_pose_estimation'][
                    'pred_boxes'] = bounding_box[max_idx.item()]

                # Converts Matplotlib RGB format to OpenCV BGR format

                plot_pose_cube(vis_frame, predictions[max_idx.item()][0], predictions[max_idx.item()][1], predictions[max_idx.item()][2], \
                                tdx = (face_keypoints[max_idx.item()][0] + face_keypoints[max_idx.item()][2]) / 2, \
                                tdy= (face_keypoints[max_idx.item()][1] + face_keypoints[max_idx.item()][3]) / 2, \
                                size = w[max_idx.item()])
                # draw_axis(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \
                #                 tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \
                #                 tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \
                #                 size = w[i])

            data_json = self.data_json
            self.data_json['frame'] = self.frame_count
            self.frame_count += 1
            return vis_frame, data_json

        frame_gen = self._frame_from_video(video)

        for frame in frame_gen:

            yield process_predictions(frame, self.predictor_object(frame),
                                      self.predictor_keypoint(frame))
Пример #20
0
class AnnotateVideo(Pipeline):
    """Pipeline task for video annotation."""
    def __init__(self,
                 dst,
                 metadata_name,
                 instance_mode=ColorMode.IMAGE,
                 frame_num=True,
                 predictions=True,
                 pose_flows=True):
        self.dst = dst
        self.metadata_name = metadata_name
        self.metadata = MetadataCatalog.get(self.metadata_name)
        self.instance_mode = instance_mode
        self.frame_num = frame_num
        self.predictions = predictions
        self.pose_flows = pose_flows

        self.cpu_device = torch.device("cpu")
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)

        super().__init__()

    def map(self, data):
        dst_image = data["image"].copy()
        data[self.dst] = dst_image

        if self.frame_num:
            self.annotate_frame_num(data)
        if self.predictions:
            self.annotate_predictions(data)
        if self.pose_flows:
            self.annotate_pose_flows(data)

        return data

    def annotate_frame_num(self, data):
        dst_image = data[self.dst]
        frame_idx = data["frame_num"]

        put_text(dst_image,
                 f"{frame_idx:04d}", (0, 0),
                 color=colors.get("white").to_bgr(),
                 bg_color=colors.get("black").to_bgr(),
                 org_pos="tl")

    def annotate_predictions(self, data):
        if "predictions" not in data:
            return

        dst_image = data[self.dst]
        dst_image = dst_image[:, :, ::-1]  # Convert OpenCV BGR to RGB format
        predictions = data["predictions"]

        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_image = self.video_visualizer.draw_panoptic_seg_predictions(
                dst_image, panoptic_seg.to(self.cpu_device), segments_info)
        elif "sem_seg" in predictions:
            sem_seg = predictions["sem_seg"].argmax(dim=0)
            vis_image = self.video_visualizer.draw_sem_seg(
                dst_image, sem_seg.to(self.cpu_device))
        elif "instances" in predictions:
            instances = predictions["instances"]
            vis_image = self.video_visualizer.draw_instance_predictions(
                dst_image, instances.to(self.cpu_device))

        # Converts RGB format to OpenCV BGR format
        vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR)
        data[self.dst] = vis_image

    def annotate_pose_flows(self, data):
        if "pose_flows" not in data:
            return

        predictions = data["predictions"]
        instances = predictions["instances"]
        keypoints = instances.pred_keypoints.cpu().numpy()
        l_pairs = [
            (0, 1),
            (0, 2),
            (1, 3),
            (2, 4),  # Head
            (5, 6),
            (5, 7),
            (7, 9),
            (6, 8),
            (8, 10),
            (6, 12),
            (5, 11),
            (11, 12),  # Body
            (11, 13),
            (12, 14),
            (13, 15),
            (14, 16)
        ]

        dst_image = data[self.dst]
        height, width = dst_image.shape[:2]

        pose_flows = data["pose_flows"]
        pose_colors = list(colors.items())
        pose_colors_len = len(pose_colors)

        for idx, pose_flow in enumerate(pose_flows):
            pid = pose_flow["pid"]
            pose_color_idx = ((pid * 10) % pose_colors_len +
                              pose_colors_len) % pose_colors_len
            pose_color_bgr = pose_colors[pose_color_idx][1].to_bgr()
            (start_x, start_y, end_x, end_y) = pose_flow["box"].astype("int")
            cv2.rectangle(dst_image, (start_x, start_y), (end_x, end_y),
                          pose_color_bgr, 2, cv2.LINE_AA)
            put_text(dst_image,
                     f"{pid:d}", (start_x, start_y),
                     color=pose_color_bgr,
                     bg_color=colors.get("black").to_bgr(),
                     org_pos="tl")

            instance_keypoints = keypoints[idx]
            l_points = {}
            p_scores = {}
            # Draw keypoints
            for n in range(instance_keypoints.shape[0]):
                score = instance_keypoints[n, 2]
                if score <= 0.05:
                    continue
                cor_x = int(np.clip(instance_keypoints[n, 0], 0, width))
                cor_y = int(np.clip(instance_keypoints[n, 1], 0, height))
                l_points[n] = (cor_x, cor_y)
                p_scores[n] = score
                cv2.circle(dst_image, (cor_x, cor_y), 2, pose_color_bgr, -1)
            # Draw limbs
            for i, (start_p, end_p) in enumerate(l_pairs):
                if start_p in l_points and end_p in l_points:
                    start_xy = l_points[start_p]
                    end_xy = l_points[end_p]
                    start_score = p_scores[start_p]
                    end_score = p_scores[end_p]
                    cv2.line(dst_image, start_xy, end_xy, pose_color_bgr,
                             int(2 * (start_score + end_score) + 1))
Пример #21
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                k = 0
                try:
                    vis_frame, colors = video_visualizer.draw_instance_predictions(
                        frame, predictions)
                    k = 1
                except:
                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, predictions)
                if k == 1:
                    boxes = predictions.pred_boxes.tensor.numpy(
                    ) if predictions.has("pred_boxes") else None
                    classes = predictions.pred_classes.numpy(
                    ) if predictions.has("pred_classes") else None
                    person_list = []
                    person_track = []
                    for box, class_label, color in zip(boxes, classes, colors):
                        if int(class_label) == 0:
                            pixel_width = box[2] - box[0]
                            # print(box,'=========================>')
                            # print(pixel_width,'============================>')
                            box = np.asarray([[box[0], box[1]],
                                              [box[2], box[3]]])
                            # pixel_per_metric = 15.45
                            # original_width = pixel_width * pixel_per_metric
                            # distance_z = (original_width*3)/pixel_width  #D’ = (W x F) / P
                            distance_z = pixel_width
                            cX = np.average(box[:, 0])
                            cY = np.average(box[:, 1])
                            # cY = cY + distance_z
                            person_list.append([cX, cY, distance_z])
                            person_track.append(color)
                    # print('<=============================>',person_list,'<=============================>')
            #find the center of the box by top-left x and bottom-right x / 2 and same for y

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            # D = dist.cdist(person_list,person_list,'euclidean')
            # print(person_list,D)
            # def midpoint(ptA, ptB):
#     return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5)
            self.time_count += 1

            vis_frame = frame
            if k == 1:
                person = sorted(zip(person_list, person_track))

                hh, ww, c = (540, 960, 3)
                # hh,ww,c = vis_frame.shape
                # aspect_ratio = 960/540

                # width_scale = (530/960)
                # height_scale = (600/540)
                # result_width = int(vis_frame.shape[1]*width_scale)
                # result_height= int(vis_frame.shape[0]*height_scale)
                # result = np.zeros((result_width,result_height, 3))
                result = np.zeros((530, 600, 3))
                # x_scale = (result_width/vis_frame.shape[1])
                # y_scale = (result_height/vis_frame.shape[0])
                x_scale = (530 / vis_frame.shape[1])
                y_scale = (600 / vis_frame.shape[0])
                ht, wd, cc = result.shape
                # print(ww,wd)
                xx = (ww - wd) // 2
                yy = (hh - ht) // 2
                # print(xx, yy,'.................')
                color = (245, 245, 245)
                layer1 = np.full((hh, ww, cc), color, dtype=np.uint8)

                green_list = []
                yellow_list = []
                red_list = []
                for box_i, track_i in person:
                    for box_j, track_j in person:
                        objectid = str(track_i) + str(track_j)
                        objectid = objectid.replace('[', '').replace(
                            ']', '').replace('.', '').replace(' ', '')
                        if self.time_count % 10:
                            self.time_count = 0
                            for indexs, l in enumerate(self.all_track_id):
                                if l != objectid:
                                    self.disappear(l)
                                    if self.maximum_wait[l] >= 10000:
                                        self.detrack(l, indexs)

                        if box_i != box_j:
                            xA, yA, zA = box_i
                            xB, yB, zB = box_j
                            z_check = abs(zA - zB)
                            D = dist.euclidean((xA, yA), (xB, yB))
                            division_index_A = yA / y_division
                            division_index_B = yB / y_division
                            A_div = division[int(division_index_A)]
                            B_div = division[int(division_index_B)]
                            yA = abs(yA + A_div)
                            yB = abs(yB + B_div)
                            xA = abs(xA + A_div)
                            xB = abs(xB + B_div)

                            if abs(division_index_A - division_index_B) < 1.0:
                                Main_threshold = min(A_div, B_div)
                            else:
                                Main_threshold = 0.4
                            # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),
                            #             (255,0,0), 2)
                            # def midpoint(ptA, ptB):

#     return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5)
# (mX, mY) = midpoint((xA, yA), (xB, yB))
# cv2.putText(vis_frame, "{:.1f}in".format(D), (int(mX), int(mY - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,0,0), 2)
# # print('.........  ...')
                            if D < Main_threshold:
                                if objectid in self.objects:
                                    self.update(id=objectid)
                                else:
                                    self.all_track_id.append(objectid)
                                    self.create_track(id=objectid)
                                if self.objects[objectid] <= 90:
                                    xA, yA, zA = box_i
                                    xB, yB, ZB = box_j
                                    # cv2.circle(vis_frame, (int(xA), int(yA)), 5, (255,0,0), -1)
                                    # cv2.circle(vis_frame, (int(xB), int(yB)), 5, (255,0,0), -1)
                                    # overlay = vis_frame.copy()
                                    cv2.circle(vis_frame, (int(xA), int(yA)),
                                               3, (0, 255, 255), -1)
                                    cv2.circle(vis_frame, (int(xB), int(yB)),
                                               3, (0, 255, 255), -1)
                                    cv2.line(vis_frame, (int(xA), int(yA)),
                                             (int(xB), int(yB)), (255, 255, 0),
                                             2)
                                    if box_i not in red_list and box_i not in yellow_list:
                                        yellow_list.append(box_i)
                                        new_box_i_x = int(
                                            round((box_i[0]) * x_scale))
                                        new_box_i_y = int(
                                            round((box_i[1]) * y_scale))
                                        new_box_j_x = int(
                                            round((box_j[0]) * x_scale))
                                        new_box_j_y = int(
                                            round((box_j[1]) * y_scale))
                                        cv2.line(result, (int(new_box_i_x),
                                                          int(new_box_i_y)),
                                                 (int(new_box_j_x),
                                                  int(new_box_j_y)),
                                                 (255, 255, 0), 2)

                                    # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.,0, vis_frame)

                                else:
                                    xA, yA, zA = box_i
                                    xB, yB, zB = box_j
                                    # overlay = vis_frame.copy()
                                    cv2.circle(vis_frame, (int(xA), int(yA)),
                                               3, (0, 0, 255), -1)
                                    cv2.circle(vis_frame, (int(xB), int(yB)),
                                               3, (0, 0, 255), -1)
                                    cv2.line(vis_frame, (int(xA), int(yA)),
                                             (int(xB), int(yB)), (255, 0, 0),
                                             2)
                                    if box_i not in red_list:
                                        red_list.append(box_i)
                                        new_box_i_x = int(
                                            round((box_i[0]) * x_scale))
                                        new_box_i_y = int(
                                            round((box_i[1]) * y_scale))
                                        new_box_j_x = int(
                                            round((box_j[0]) * x_scale))
                                        new_box_j_y = int(
                                            round((box_j[1]) * y_scale))
                                        cv2.line(result, (int(new_box_i_x),
                                                          int(new_box_i_y)),
                                                 (int(new_box_j_x),
                                                  int(new_box_j_y)),
                                                 (0, 0, 255), 2)

                            else:
                                if box_i not in red_list and box_i not in yellow_list and box_i not in green_list:
                                    green_list.append(box_i)
                                if box_j not in red_list and box_j not in yellow_list and box_j not in green_list:
                                    green_list.append(box_j)
                for box_check, track_check in person:
                    if box_check in red_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 0, 255), 5)
                    elif box_check in yellow_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 255, 255), 5)
                    elif box_check in green_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 128, 0), 5)
                cv2.putText(result, "{:.1f}".format(len(red_list)),
                            (int(20), int(40)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 0, 255), 5)
                cv2.putText(result, "{:.1f}".format(len(yellow_list)),
                            (int(20), int(70)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 255, 255), 5)
                cv2.putText(result, "{:.1f}".format(len(green_list)),
                            (int(20), int(100)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 255, 0), 5)
                # for i in range(1,16):
                #     xA = 1
                #     yA = y_division * i
                #     xB = 700
                #     yB = yA

                #     cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),(255,0,0), 2)

                # print(vis_frame.shape,layer1.shape)
                # cv2.imwrite('imagetest.jpg',layer1)
                vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR)
                layer1[yy:yy + ht, xx:xx + wd] = result
                # vis_frame = cv2.resize(vis_frame,(960,540),interpolation = cv2.INTER_CUBIC)
                vis_frame = np.concatenate((vis_frame, layer1), axis=1)

            else:
                vis_frame = cv2.resize(vis_frame, (960, 540),
                                       interpolation=cv2.INTER_CUBIC)
                hh, ww, c = vis_frame.shape
                result = np.zeros((530, 600, 3))
                # x_scale = (result_width/vis_frame.shape[1])
                # y_scale = (result_height/vis_frame.shape[0])
                x_scale = (530 / vis_frame.shape[1])
                y_scale = (600 / vis_frame.shape[0])
                ht, wd, cc = result.shape
                # print(ww,wd)
                xx = (ww - wd) // 2
                yy = (hh - ht) // 2
                # print(xx, yy,'.................')
                color = (245, 245, 245)
                layer1 = np.full((hh, ww, cc), color, dtype=np.uint8)
                layer1[yy:yy + ht, xx:xx + wd] = result
                vis_frame = cv2.resize(vis_frame, (960, 540),
                                       interpolation=cv2.INTER_CUBIC)
                # print(layer1.shape,vis_frame.shape)
                vis_frame = np.concatenate((vis_frame, layer1), axis=1)

                # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.1,0, vis_frame)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #22
0
    # We can use `Visualizer` to draw the predictions on the image.
    #v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0)
    #v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

    #cv2_imshow(v.get_image()[:, :, ::-1])
    #cv2.imwrite('output.png',v.get_image()[:, :, ::-1])

    if args.images_input_dir:
        all_detection_outputs = {}

        jpgs = sorted(glob.glob(os.path.join(args.images_input_dir, "*.jpg")))
        num_frames = len(jpgs)

        predictor = DefaultPredictor(cfg)
        video_visualiser = VideoVisualizer(
            MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))

        os.makedirs(os.path.join(args.output, 'detection'), exist_ok=True)

        predictions_save_path = os.path.join(args.output,
                                             "all_detection_outputs.pkl")
        assert not os.path.isfile(predictions_save_path), predictions_save_path

        for jpg in tqdm.tqdm(jpgs):
            image_basename = os.path.basename(jpg)
            frame_num = int(os.path.splitext(image_basename)[0])

            frame = cv2.imread(jpg)

            visualised_jpg_path = os.path.join(args.output, 'detection',
                                               image_basename)
Пример #23
0
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)

    demo = VisualizationDemo(cfg)
    output_file = None
    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            files = os.listdir(args.input[0])
            args.input = [args.input[0] + x for x in files]
            assert args.input, "The input path(s) was not found"
        visualizer = VideoVisualizer(MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"),
                                     instance_mode=ColorMode.IMAGE)
        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(
                img, visualizer=visualizer)
            if 'instances' in predictions:
                logger.info("{}: detected {} instances in {:.2f}s".format(
                    path, len(predictions["instances"]),
                    time.time() - start_time))
            else:
                logger.info("{}: detected {} instances in {:.2f}s".format(
                    path, len(predictions["proposals"]),
                    time.time() - start_time))
class VisualizationDemo(object):
    def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")

        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode
        self.parallel = parallel

        if self.parallel == 1:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            if "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                instances = self.video_visualizer.draw_instance_bbox(
                    predictions)

            return instances

        frame_gen = self._frame_from_video(video)

        if self.parallel == 1:
            buffer_size = self.predictor.default_buffer_size
            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #25
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions, id_frame):
            # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # vis_frame = VisImage(frame)
            vis_frame = frame
            # if "panoptic_seg" in predictions:
            #     panoptic_seg, segments_info = predictions["panoptic_seg"]
            #     vis_frame = video_visualizer.draw_panoptic_seg_predictions(
            #         frame, panoptic_seg.to(self.cpu_device), segments_info
            #     )
            # elif "instances" in predictions:
            #     predictions = predictions["instances"].to(self.cpu_device)
            #     vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
            if "sem_seg" in predictions:
                sem_seg = predictions["sem_seg"].argmax(dim=0).to(
                    self.cpu_device).cpu().numpy()
                # print(sem_seg)
                # print(sem_seg.shape)
                # print(labels)
                # print(areas)
                # 画图:画分割图
                mask_color = colormap[sem_seg].astype(dtype=np.uint8)
                # print(mask_color)
                # cv2.imwrite('output/' + id_frame + "-mask.jpg", mask_color)
                # print(type(mask_color))
                # print(mask_color.shape)
                # print(frame.shape)
                image = np.concatenate((frame, mask_color))
                # cv2.imwrite('output/' + id_frame + "-concat.jpg", image)
                image = cv2.addWeighted(frame, 0.3, mask_color, 0.7, 0)
                # cv2.imwrite('output/' + id_frame + "-add.jpg",image)
                # vis_frame=VisImage(image)
                vis_frame = image
                # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
                # vis_frame = video_visualizer.draw_sem_seg(
                #     frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
                # )
                # print(type(vis_frame))
                # Converts Matplotlib RGB format to OpenCV BGR format

                palette = (2**11 - 1, 2**15 - 1, 2**20 - 1)
                # dects = self.tracks[str(cnt+1)]
                # print(id_frame)

                #统计:增加分割结果统计
                labels, areas = np.unique(sem_seg, return_counts=True)
                if int(id_frame) % 300 == 0:
                    for i in range(len(labels)):
                        all_cnt[str(labels[i])] += 1  # 总的计数
                        seg_cnt[str(labels[i])] += 1  # 分割的计数
                        # print(i)
                        # print(i,labels[i],areas[i])
            # 画图:画跟踪框
                cateid = 0
                if (self.tracks.get(id_frame)):
                    # id_frame = str(int(id_frame))
                    dects = self.tracks[id_frame]
                    for dect in dects:
                        x1 = int(dect['bbox'][0])
                        x2 = int(dect['bbox'][0]) + int(dect['bbox'][2])
                        y1 = int(dect['bbox'][1])
                        y2 = int(dect['bbox'][1]) + int(dect['bbox'][3])
                        category = categories[dect['category_id']]
                        cateid = dect['cateid']
                        catecnt = all_cnt[dect['category_id']]
                        insid = int(dect['insid'])
                        track_cnt[dect['category_id']] = cateid  # 跟踪的计数
                        # print(track_cnt[dect['category_id']],cateid,insid)
                        color = [
                            int((p * (insid**2 - insid + 1)) % 255)
                            for p in palette
                        ]
                        color = tuple(color)
                        vis_frame = cv2.rectangle(vis_frame, (x1, y1),
                                                  (x2, y2), color, 3)
                        label = '{:s}:{:d}/{:d} '.format(
                            category, cateid, catecnt)
                        # print(label)
                        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN,
                                                 2, 2)[0]
                        vis_frame = cv2.rectangle(
                            vis_frame, (x1, y1),
                            (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color,
                            -1)
                        cv2.putText(vis_frame, label, (x1, y1 + t_size[1] + 4),
                                    cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255],
                                    2)
            # 画图:写统计结果
                for i in range(30):
                    # print(categories[str(i)], ins_id_category[str(i)])
                    if (i < 19):
                        color = colormap.tolist()[i]
                        color = tuple(color)
                        # color = tuple([int(color[0]*0.7),int(color[1]*0.7),int(color[2]*0.7)])
                    else:
                        color = colormap.tolist()[19]
                        color = tuple(color)
                        # color = tuple([int(color[0]*0.7),int(color[1]*0.7),int(color[2]*0.7)])
                    label = '{:s}:{:d}/{:d} '.format(
                        categories[str(i)],
                        seg_cnt[str(i)] + track_cnt[str(i)], all_cnt[str(i)])
                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2,
                                             2)[0]
                    vis_frame = cv2.rectangle(vis_frame,
                                              (0, (t_size[1] + 10) * i),
                                              (t_size[0] + 1,
                                               (t_size[1] + 10) * (i + 1)),
                                              color, -1)  #对角线画矩形框
                    cv2.putText(vis_frame, label,
                                (0, (t_size[1] + 10) * (i + 1) - 5),
                                cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)

                # vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR)
                # cv2.imwrite('output/' + id_frame + '-sd.jpg', vis_frame)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    # dects=self.tracks[str(cnt+1-buffer_size)]
                    yield process_predictions(frame, predictions,
                                              str(cnt + 1 - buffer_size))

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                # dects=self.tracks[-len(frame_data):][0]
                yield process_predictions(frame, predictions,
                                          str(-len(frame_data)))
        else:
            # for frame in frame_gen:
            predictions = []
            for cnt, frame in enumerate(frame_gen):
                # dects = self.tracks[str(cnt+1)]
                # print(cnt)
                if cnt % 1 == 0:
                    # print(cnt)
                    predictions = self.predictor(frame)
                yield process_predictions(frame, predictions, str(cnt))
Пример #26
0
                fps=float(frames_per_second),
                frameSize=(width, height),
                isColor=True,
            )"""

while (cap.isOpened()):

    ret, frame = cap.read(0)
    frame = cv2.resize(frame, (224, 224))
    print(fps)
    print(num_frames)

    try:
        outputs = predictor(frame)
        #v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
        v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                            instance_mode=ColorMode.IMAGE_BW)
        v = v.draw_instance_predictions(frame, outputs["instances"].to('cpu'))
        print(outputs["instances"].pred_classes)
        omt = str(outputs["instances"].pred_classes)
        outpclass = omt[8:9]
        print(outpclass)
        """while (cap.isOpened()): #outpclass is printing ang giving 0 if 0 comes then action this loop
          if outpclass == '0':
              #unlock(8) make ur own function to test 
              time.sleep(10) #Lock will remains open for 10 seconds. make this run in loop
              #lock(8)
              #GPIO.cleanup(8)"""

        #out.write(v.get_image())
    #cv2_imshow("Moda", v.get_image())
    except:
Пример #27
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: preductions, ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            # See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
            #   note tensor ==> pytorch.tensor
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # if "panoptic_seg" in predictions:
            #     panoptic_seg, segments_info = predictions["panoptic_seg"]
            #     retval = panoptic_seg # TODO
            #     vis_frame = video_visualizer.draw_panoptic_seg_predictions(
            #         frame, panoptic_seg.to(self.cpu_device), segments_info
            #     )
            # elif "instances" in predictions:
            if "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
                # TODO: grab all these
                # classes = predictions.to(self.cpu_device).pred_classes.numpy()
                # scores = predictions.scores
                # retval = predictions.to(self.cpu_device).pred_boxes.tensor.numpy()
                retval = predictions
            # elif "sem_seg" in predictions:
            #     vis_frame = video_visualizer.draw_sem_seg(
            #         frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
            #     )
            #     retval = predictions["sem_seg"].argmax(dim=0) # TODO

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return retval, vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
Пример #28
0
    siammask = Custom(anchors=cfg_siammask['anchors'])
    if args.siammask_resume:
        assert isfile(
            args.siammask_resume), 'Please download {} first.'.format(
                args.siammask_resume)
        siammask = load_pretrain(siammask, args.siammask_resume)

    siammask.eval().to(device)

    frame_gen = _frame_from_video(video)

    metadata = MetadataCatalog.get(cfg_detectron.DATASETS.TEST[0] if len(
        cfg_detectron.DATASETS.TEST) else "__unused")

    video_visualizer = VideoVisualizer(metadata, instance_mode=ColorMode.IMAGE)

    if detectron_only:
        maxDissapear = 1
    objectTracker = ObjectTracker(maxDissapear)

    frame_idx = 0

    df = pd.DataFrame(columns=[
        'FrameId', 'Id', 'X', 'Y', 'Width', 'Height', 'Confidence', 'ClassId',
        'Visibility'
    ])

    for frame in frame_gen:

        vis_frame, predictions = process_frame(frame, detector)
Пример #29
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer_object = VideoVisualizer(self.metadata_object,
                                                  self.instance_mode)
        video_visualizer_keypoint = VideoVisualizer(self.metadata_keypoint,
                                                    self.instance_mode)

        def get_parameters(annos):
            if annos["object_detection"]["pred_boxes"]:
                temp = annos["object_detection"]["pred_boxes"][0]
                obj_det = [1]
                temp = np.asarray(temp)
                temp = temp.flatten()

                key_det = annos["keypoint_detection"]["pred_keypoints"][0]
                key_det = np.asarray(key_det)
                key_det = key_det[0:11, 0:2]
                key_det = np.subtract(key_det, temp[0:2])
                key_det = key_det.flatten()

            else:
                obj_det = [-1]
                obj_det = np.asarray(obj_det)

                key_det = annos["keypoint_detection"]["pred_keypoints"][0]
                key_det = np.asarray(key_det)
                key_det = key_det[0:11, 0:2]
                key_det = key_det.flatten()

            if annos["head_pose_estimation"]["predictions"]:
                hp_est = annos["head_pose_estimation"]["predictions"][0]
                hp_est = np.asarray(hp_est)
            else:
                hp_est = np.asarray([-100, -100, -100])

            anno_list = np.concatenate((obj_det, key_det, hp_est))
            return anno_list

        def process_predictions(frame, predictions_object,
                                predictions_keypoint):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            blank_image = np.zeros((frame.shape[0], frame.shape[1], 3),
                                   np.uint8)

            if "instances" in predictions_object:
                predictions_object = predictions_object["instances"].to(
                    self.cpu_device)
                self.data_json['object_detection'][
                    'pred_boxes'] = predictions_object.get(
                        'pred_boxes').tensor.numpy().tolist()
                self.data_json['object_detection'][
                    'scores'] = predictions_object.get(
                        'scores').numpy().tolist()
                vis_frame = video_visualizer_object.draw_instance_predictions(
                    frame, predictions_object)

            if "instances" in predictions_keypoint:
                predictions_keypoint = predictions_keypoint["instances"].to(
                    self.cpu_device)
                self.data_json['keypoint_detection'][
                    'pred_boxes'] = predictions_keypoint.get(
                        'pred_boxes').tensor.numpy().tolist()
                self.data_json['keypoint_detection'][
                    'scores'] = predictions_keypoint.get(
                        'scores').numpy().tolist()
                self.data_json['keypoint_detection'][
                    'pred_keypoints'] = predictions_keypoint.get(
                        'pred_keypoints').numpy().tolist()
                vis_frame = video_visualizer_keypoint.draw_instance_predictions(
                    vis_frame.get_image(), predictions_keypoint)

            # head pose estimation
            predictions, bounding_box, face_keypoints, w, face_area = head_pose_estimation(
                frame, self.mtcnn, self.head_pose_module, self.transformations,
                self.softmax, self.idx_tensor)
            self.data_json['head_pose_estimation']['predictions'] = predictions
            self.data_json['head_pose_estimation']['pred_boxes'] = bounding_box

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

            for i in range(len(predictions)):
                plot_pose_cube(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \
                                tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \
                                tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \
                                size = w[i])
                # draw_axis(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \
                #                 tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \
                #                 tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \
                #                 size = w[i])

            data_json = self.data_json
            self.data_json['frame'] = self.frame_count
            self.frame_count += 1

            inputs_MLP = get_parameters(self.data_json)
            inputs_MLP = Variable(torch.from_numpy(inputs_MLP)).float().cuda()
            outputs_MLP = self.mlp_model(inputs_MLP)
            predicted_MLP = (outputs_MLP >= 0.5)

            cv2.putText(vis_frame,str(predicted_MLP.item()), (10,700), \
                cv2.FONT_HERSHEY_SIMPLEX, 3, (0,0,0), 10)

            return vis_frame, data_json

        frame_gen = self._frame_from_video(video)

        for frame in frame_gen:

            yield process_predictions(frame, self.predictor_object(frame),
                                      self.predictor_keypoint(frame))
Пример #30
0
    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions, tracker):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                tracker.update(boxes=predictions.pred_boxes.tensor.numpy(),
                               labels=predictions.pred_classes.numpy())

                if SAVE_PREDICTIONS:
                    SAVED_PREDICTIONS.append(predictions)
                    if len(SAVED_PREDICTIONS) == 100:
                        with open('predictions.pkl', 'wb') as fp:
                            pickle.dump(SAVED_PREDICTIONS, fp)
                            print('Saving done!')

                vis_frame = draw_instance_predictions(video_visualizer, frame,
                                                      predictions, tracker)

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions, self.tracker)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions, self.tracker)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame),
                                          self.tracker)