示例#1
0
class Visualization(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = METADATA
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.
        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.
        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.
        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#2
0
class VideoPredection(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    video = "test_videos/video.mp4"

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)

        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#3
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image, path):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

                extract_img = vis_output.img
                #画关键点
                keypoint_box = instances._fields["pred_keypoints"].numpy(
                ).tolist()
                img_name = path.split("/")[-1]
                if (len(keypoint_box) > 0):
                    for idx, keypoint_list in enumerate(keypoint_box):
                        for idxx, keypoint in enumerate(keypoint_list):
                            pass
                        _ = self.write(extract_img, 11, 15, 13, keypoint_list)
                        text_img = self.write(_, 12, 16, 14, keypoint_list)
                    rgb = text_img[..., ::-1]
                    cv2.imwrite(
                        "/home/dooncloud/GitHub/detectron2/output/self_" +
                        img_name, rgb)

                # vis_output = visualizer.draw_instance_predictions(predictions=instances)

        return predictions, vis_output

    def calculate_angle(self, point_1, point_2, point_base):
        vector_a = [point_1[0] - point_base[0], point_1[1] - point_base[1]]
        vector_b = [point_2[0] - point_base[0], point_2[1] - point_base[1]]
        up = np.dot(vector_a, vector_b)
        a = np.linalg.norm(np.array(vector_a))
        b = np.linalg.norm(np.array(vector_b))
        down = a * b
        if down == 0:
            cos = 0.0
        else:
            cos = up / down
        if (abs(cos) > 1):
            cos = 1
        return math.degrees(math.acos(cos))

    def calculate_distance(self, point_1, point_2):
        vector = [point_1[0] - point_2[0], point_1[1] - point_2[1]]
        distance = np.linalg.norm(np.array(vector))
        return distance

    def where_point_write(self, n_list, keypoint_list):
        point_1 = keypoint_list[n_list[0]]
        point_2 = keypoint_list[n_list[1]]
        point_base = keypoint_list[n_list[2]]
        result = self.calculate_angle(point_1, point_2, point_base)
        x, y = point_base[0], point_base[1]
        return result, x, y

    def write(self, img, need_list, keypoint_list):
        if len(need_list) > 0:
            for i in need_list:
                result, x, y = self.where_point_write(i, keypoint_list)
                img = cv2.putText(img, "%.2f" % result, (int(x), int(y)),
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255),
                                  2)
        else:
            img = img
        return img

    def where_distance_write(self, n_list, keypoint_list):
        point_1 = keypoint_list[n_list[0]]
        point_2 = keypoint_list[n_list[1]]
        result = self.calculate_distance(point_1, point_2)
        x, y = point_2[0], point_2[1]
        return result, x, y

    def write_distance(self, img, need_list, keypoint_list):
        if len(need_list) > 0:
            for i in need_list:
                result, x, y = self.where_distance_write(i, keypoint_list)
                img = cv2.putText(img, "%.2f" % result, (int(x), int(y)),
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255),
                                  2)
        else:
            img = img
        return img

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video, dictionary):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions, dictionary):
            resulte = 0
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)

            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                #判断框
                max_inform_keypoint = self.search_max_box_information(
                    predictions)
                if (max_inform_keypoint != None):
                    #画框
                    bbox = max_inform_keypoint[0]
                    frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                          (int(bbox[2]), int(bbox[3])),
                                          (0, 255, 0), 2)
                    # 画关键点
                    keypoint_list = max_inform_keypoint[1]
                    for i, keypoint in enumerate(keypoint_list):
                        circle_coord = (int(keypoint[0]), int(keypoint[1]))
                        frame = cv2.putText(frame, str(i), circle_coord,
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                            (255, 0, 0), 2)
                    #画角度
                    frame = self.write(frame, dictionary["angle"],
                                       keypoint_list)
                    #画距离
                    frame = self.write_distance(frame, dictionary["distance"],
                                                keypoint_list)
                    #判断仰卧起坐
                    resulte = self.poll_situp(keypoint_list, dictionary)
                    #存结果
                    # save_json = self.save_resulte(keypoint_list,dictionary)

                    vis_frame = frame[..., ::-1]
                else:
                    vis_frame = frame[..., ::-1]

                # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)

            return {
                "vis_frame": vis_frame,
                "resulte": resulte,
                "max_inform_keypoint": max_inform_keypoint
            }

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame),
                                          dictionary)

    def poll_situp(self, keypoint_list, dictionary):
        ankle_angle_poll = self.angle_poll(dictionary["judge_ankle_angle"],
                                           keypoint_list,
                                           dictionary["require_ankle"])
        up_butt_angle_poll = self.angle_poll(dictionary["judge_butt_angle"],
                                             keypoint_list,
                                             dictionary["require_butt_up"])
        down_butt_angle_poll = self.angle_poll(dictionary["judge_butt_angle"],
                                               keypoint_list,
                                               dictionary["require_butt_down"])
        distance_ratio_poll = self.distance_ratio_poll(
            dictionary["judge_distance_ratio"], keypoint_list,
            dictionary["require_distance_ratio"])

        up_array = ankle_angle_poll + distance_ratio_poll + up_butt_angle_poll
        down_array = down_butt_angle_poll

        return [up_array, down_array]

    def save_resulte(self, keypoint_list, dictionary):
        ankle_num_list = self.calculate_save_angle(
            dictionary["judge_ankle_angle"], keypoint_list)

        log_f = open("digital", "a+")
        print(ankle_num_list, file=log_f)
        log_f.close()

        butt_num_list = self.calculate_save_angle(
            dictionary["judge_butt_angle"], keypoint_list)

        distance_num_list = self.distance_poll(
            dictionary["judge_distance"], keypoint_list,
            dictionary["require_distance_ratio"])

    def calculate_save_angle(self, angle_list, keypoint_list):
        resulte = []
        for i in (angle_list):
            point_1 = keypoint_list[i[0]]
            point_2 = keypoint_list[i[1]]
            point_base = keypoint_list[i[2]]
            angle_result = self.calculate_angle(point_1, point_2, point_base)
            resulte.append(angle_result)
        return resulte

    def angle_poll(self, angle_list, keypoint_list, requirement):
        poll = []
        resulte = self.calculate_save_angle(angle_list, keypoint_list)
        for idx, per_resulte in enumerate(resulte):
            if "<" is requirement["need"]:
                if per_resulte < requirement["angle"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            elif ">" is requirement["need"]:
                if per_resulte > requirement["angle"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            elif "=" is requirement["need"]:
                if per_resulte == requirement["angle"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            else:
                raise Exception("calculate_dictionary  请输入正确判断符号")
        return poll

    def calculate_save_distance_ratio(self, distance_list, keypoint_list):
        resulte = []
        for i in (distance_list):
            point_1_1 = keypoint_list[i[0]]
            point_1_2 = keypoint_list[i[1]]
            point_2_1 = keypoint_list[i[2]]
            point_2_2 = keypoint_list[i[3]]

            up_result = self.calculate_distance(point_1_1, point_1_2)
            down_resulte = self.calculate_distance(point_2_1, point_2_2)
            ratio = up_result / down_resulte
            resulte.append(ratio)
        return resulte

    def distance_ratio_poll(self, distance_list, keypoint_list, requirement):
        poll = []
        resulte = self.calculate_save_distance_ratio(distance_list,
                                                     keypoint_list)
        print(resulte)
        for idx, per_resulte in enumerate(resulte):
            if "<" is requirement["need"]:
                if per_resulte < requirement["distance"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            elif ">" is requirement["need"]:
                if per_resulte > requirement["distance"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            elif "=" is requirement["need"]:
                if per_resulte == requirement["distance"][idx]:
                    poll.append(1)
                else:
                    poll.append(0)
            else:
                raise Exception("calculate_dictionary  请输入正确判断符号")
        print(poll)
        return poll

    def search_max_box_information(self, predictions):
        keypoint_box_area = predictions._fields["pred_boxes"].area().numpy(
        ).tolist()
        keypoint_box_coordinate = predictions._fields[
            "pred_boxes"].tensor.numpy().tolist()
        keypoint_box = predictions._fields["pred_keypoints"].numpy().tolist()
        assert (len(keypoint_box_area) == len(keypoint_box_coordinate)
                and len(keypoint_box_coordinate)
                == len(keypoint_box)) is True, "search max box --error"
        if (len(keypoint_box_area) != 0):
            if len(keypoint_box_area) > 1:
                index = keypoint_box_area.index(max(keypoint_box_area))
                return [keypoint_box_coordinate[index], keypoint_box[index]]
            else:
                return [keypoint_box_coordinate[0], keypoint_box[0]]
        else:
            pass
示例#4
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        # if "panoptic_seg" in predictions:
        #     panoptic_seg, segments_info = predictions["panoptic_seg"]
        #     vis_output = visualizer.draw_panoptic_seg_predictions(
        #         panoptic_seg.to(self.cpu_device), segments_info
        #     )
        # else:
        #     if "sem_seg" in predictions:
        #         vis_output = visualizer.draw_sem_seg(
        #             predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
        #         )
        if "instances" in predictions:
            instances = predictions["instances"].to(self.cpu_device)
            vis_output = visualizer.draw_instance_predictions(
                predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: preductions, ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            # See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
            #   note tensor ==> pytorch.tensor
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # if "panoptic_seg" in predictions:
            #     panoptic_seg, segments_info = predictions["panoptic_seg"]
            #     retval = panoptic_seg # TODO
            #     vis_frame = video_visualizer.draw_panoptic_seg_predictions(
            #         frame, panoptic_seg.to(self.cpu_device), segments_info
            #     )
            # elif "instances" in predictions:
            if "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
                # TODO: grab all these
                # classes = predictions.to(self.cpu_device).pred_classes.numpy()
                # scores = predictions.scores
                # retval = predictions.to(self.cpu_device).pred_boxes.tensor.numpy()
                retval = predictions
            # elif "sem_seg" in predictions:
            #     vis_frame = video_visualizer.draw_sem_seg(
            #         frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
            #     )
            #     retval = predictions["sem_seg"].argmax(dim=0) # TODO

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return retval, vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#5
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)
        self.nextObjectID = 0
        self.objects = OrderedDict()
        self.frame_count = 0
        self.maximum_wait = OrderedDict()
        self.all_track_id = []
        self.count = 0
        self.time_count = 0

    def create_track(self, id):
        self.objects[id] = 1

    def disappear(self, id):
        if id in self.maximum_wait:
            self.maximum_wait[id] += 1
        else:
            self.maximum_wait[id] = 1

    def detrack(self, id, index):
        del self.maximum_wait[id]
        del self.objects[id]
        del self.all_track_id[index]

    def update(self, id):
        self.objects[id] += 1

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # print('=====================>',predictions['instances'].pred_classes)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)

                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()

            if success:
                frame = cv2.resize(frame, (960, 540),
                                   interpolation=cv2.INTER_CUBIC)

                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                k = 0
                try:
                    vis_frame, colors = video_visualizer.draw_instance_predictions(
                        frame, predictions)
                    k = 1
                except:
                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, predictions)
                if k == 1:
                    boxes = predictions.pred_boxes.tensor.numpy(
                    ) if predictions.has("pred_boxes") else None
                    classes = predictions.pred_classes.numpy(
                    ) if predictions.has("pred_classes") else None
                    person_list = []
                    person_track = []
                    for box, class_label, color in zip(boxes, classes, colors):
                        if int(class_label) == 0:
                            pixel_width = box[2] - box[0]
                            # print(box,'=========================>')
                            # print(pixel_width,'============================>')
                            box = np.asarray([[box[0], box[1]],
                                              [box[2], box[3]]])
                            # pixel_per_metric = 15.45
                            # original_width = pixel_width * pixel_per_metric
                            # distance_z = (original_width*3)/pixel_width  #D’ = (W x F) / P
                            distance_z = pixel_width
                            cX = np.average(box[:, 0])
                            cY = np.average(box[:, 1])
                            # cY = cY + distance_z
                            person_list.append([cX, cY, distance_z])
                            person_track.append(color)
                    # print('<=============================>',person_list,'<=============================>')
            #find the center of the box by top-left x and bottom-right x / 2 and same for y

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            # D = dist.cdist(person_list,person_list,'euclidean')
            # print(person_list,D)
            # def midpoint(ptA, ptB):
#     return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5)
            self.time_count += 1

            vis_frame = frame
            if k == 1:
                person = sorted(zip(person_list, person_track))

                hh, ww, c = (540, 960, 3)
                # hh,ww,c = vis_frame.shape
                # aspect_ratio = 960/540

                # width_scale = (530/960)
                # height_scale = (600/540)
                # result_width = int(vis_frame.shape[1]*width_scale)
                # result_height= int(vis_frame.shape[0]*height_scale)
                # result = np.zeros((result_width,result_height, 3))
                result = np.zeros((530, 600, 3))
                # x_scale = (result_width/vis_frame.shape[1])
                # y_scale = (result_height/vis_frame.shape[0])
                x_scale = (530 / vis_frame.shape[1])
                y_scale = (600 / vis_frame.shape[0])
                ht, wd, cc = result.shape
                # print(ww,wd)
                xx = (ww - wd) // 2
                yy = (hh - ht) // 2
                # print(xx, yy,'.................')
                color = (245, 245, 245)
                layer1 = np.full((hh, ww, cc), color, dtype=np.uint8)

                green_list = []
                yellow_list = []
                red_list = []
                for box_i, track_i in person:
                    for box_j, track_j in person:
                        objectid = str(track_i) + str(track_j)
                        objectid = objectid.replace('[', '').replace(
                            ']', '').replace('.', '').replace(' ', '')
                        if self.time_count % 10:
                            self.time_count = 0
                            for indexs, l in enumerate(self.all_track_id):
                                if l != objectid:
                                    self.disappear(l)
                                    if self.maximum_wait[l] >= 10000:
                                        self.detrack(l, indexs)

                        if box_i != box_j:
                            xA, yA, zA = box_i
                            xB, yB, zB = box_j
                            z_check = abs(zA - zB)
                            D = dist.euclidean((xA, yA), (xB, yB))
                            division_index_A = yA / y_division
                            division_index_B = yB / y_division
                            A_div = division[int(division_index_A)]
                            B_div = division[int(division_index_B)]
                            yA = abs(yA + A_div)
                            yB = abs(yB + B_div)
                            xA = abs(xA + A_div)
                            xB = abs(xB + B_div)

                            if abs(division_index_A - division_index_B) < 1.0:
                                Main_threshold = min(A_div, B_div)
                            else:
                                Main_threshold = 0.4
                            # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),
                            #             (255,0,0), 2)
                            # def midpoint(ptA, ptB):

#     return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5)
# (mX, mY) = midpoint((xA, yA), (xB, yB))
# cv2.putText(vis_frame, "{:.1f}in".format(D), (int(mX), int(mY - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,0,0), 2)
# # print('.........  ...')
                            if D < Main_threshold:
                                if objectid in self.objects:
                                    self.update(id=objectid)
                                else:
                                    self.all_track_id.append(objectid)
                                    self.create_track(id=objectid)
                                if self.objects[objectid] <= 90:
                                    xA, yA, zA = box_i
                                    xB, yB, ZB = box_j
                                    # cv2.circle(vis_frame, (int(xA), int(yA)), 5, (255,0,0), -1)
                                    # cv2.circle(vis_frame, (int(xB), int(yB)), 5, (255,0,0), -1)
                                    # overlay = vis_frame.copy()
                                    cv2.circle(vis_frame, (int(xA), int(yA)),
                                               3, (0, 255, 255), -1)
                                    cv2.circle(vis_frame, (int(xB), int(yB)),
                                               3, (0, 255, 255), -1)
                                    cv2.line(vis_frame, (int(xA), int(yA)),
                                             (int(xB), int(yB)), (255, 255, 0),
                                             2)
                                    if box_i not in red_list and box_i not in yellow_list:
                                        yellow_list.append(box_i)
                                        new_box_i_x = int(
                                            round((box_i[0]) * x_scale))
                                        new_box_i_y = int(
                                            round((box_i[1]) * y_scale))
                                        new_box_j_x = int(
                                            round((box_j[0]) * x_scale))
                                        new_box_j_y = int(
                                            round((box_j[1]) * y_scale))
                                        cv2.line(result, (int(new_box_i_x),
                                                          int(new_box_i_y)),
                                                 (int(new_box_j_x),
                                                  int(new_box_j_y)),
                                                 (255, 255, 0), 2)

                                    # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.,0, vis_frame)

                                else:
                                    xA, yA, zA = box_i
                                    xB, yB, zB = box_j
                                    # overlay = vis_frame.copy()
                                    cv2.circle(vis_frame, (int(xA), int(yA)),
                                               3, (0, 0, 255), -1)
                                    cv2.circle(vis_frame, (int(xB), int(yB)),
                                               3, (0, 0, 255), -1)
                                    cv2.line(vis_frame, (int(xA), int(yA)),
                                             (int(xB), int(yB)), (255, 0, 0),
                                             2)
                                    if box_i not in red_list:
                                        red_list.append(box_i)
                                        new_box_i_x = int(
                                            round((box_i[0]) * x_scale))
                                        new_box_i_y = int(
                                            round((box_i[1]) * y_scale))
                                        new_box_j_x = int(
                                            round((box_j[0]) * x_scale))
                                        new_box_j_y = int(
                                            round((box_j[1]) * y_scale))
                                        cv2.line(result, (int(new_box_i_x),
                                                          int(new_box_i_y)),
                                                 (int(new_box_j_x),
                                                  int(new_box_j_y)),
                                                 (0, 0, 255), 2)

                            else:
                                if box_i not in red_list and box_i not in yellow_list and box_i not in green_list:
                                    green_list.append(box_i)
                                if box_j not in red_list and box_j not in yellow_list and box_j not in green_list:
                                    green_list.append(box_j)
                for box_check, track_check in person:
                    if box_check in red_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 0, 255), 5)
                    elif box_check in yellow_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 255, 255), 5)
                    elif box_check in green_list:
                        new_box_i_x = int(round((box_check[0]) * x_scale))
                        new_box_i_y = int(round((box_check[1]) * y_scale))
                        # track_i = track_i * 255.0
                        cv2.circle(result, (new_box_i_x, new_box_i_y), 5,
                                   (0, 128, 0), 5)
                cv2.putText(result, "{:.1f}".format(len(red_list)),
                            (int(20), int(40)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 0, 255), 5)
                cv2.putText(result, "{:.1f}".format(len(yellow_list)),
                            (int(20), int(70)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 255, 255), 5)
                cv2.putText(result, "{:.1f}".format(len(green_list)),
                            (int(20), int(100)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 255, 0), 5)
                # for i in range(1,16):
                #     xA = 1
                #     yA = y_division * i
                #     xB = 700
                #     yB = yA

                #     cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),(255,0,0), 2)

                # print(vis_frame.shape,layer1.shape)
                # cv2.imwrite('imagetest.jpg',layer1)
                vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR)
                layer1[yy:yy + ht, xx:xx + wd] = result
                # vis_frame = cv2.resize(vis_frame,(960,540),interpolation = cv2.INTER_CUBIC)
                vis_frame = np.concatenate((vis_frame, layer1), axis=1)

            else:
                vis_frame = cv2.resize(vis_frame, (960, 540),
                                       interpolation=cv2.INTER_CUBIC)
                hh, ww, c = vis_frame.shape
                result = np.zeros((530, 600, 3))
                # x_scale = (result_width/vis_frame.shape[1])
                # y_scale = (result_height/vis_frame.shape[0])
                x_scale = (530 / vis_frame.shape[1])
                y_scale = (600 / vis_frame.shape[0])
                ht, wd, cc = result.shape
                # print(ww,wd)
                xx = (ww - wd) // 2
                yy = (hh - ht) // 2
                # print(xx, yy,'.................')
                color = (245, 245, 245)
                layer1 = np.full((hh, ww, cc), color, dtype=np.uint8)
                layer1[yy:yy + ht, xx:xx + wd] = result
                vis_frame = cv2.resize(vis_frame, (960, 540),
                                       interpolation=cv2.INTER_CUBIC)
                # print(layer1.shape,vis_frame.shape)
                vis_frame = np.concatenate((vis_frame, layer1), axis=1)

                # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.1,0, vis_frame)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
        )
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
        if "inst" in predictions:
            visualizer.vis_inst(predictions["inst"])
        if "bases" in predictions:
            self.vis_bases(predictions["bases"])
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info
            )
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def vis_bases(self, bases):
        basis_colors = [[2, 200, 255], [107, 220, 255], [30, 200, 255], [60, 220, 255]]
        bases = bases[0].squeeze()
        bases = (bases / 8).tanh().cpu().numpy()
        num_bases = len(bases)
        fig, axes = plt.subplots(nrows=num_bases // 2, ncols=2)
        for i, basis in enumerate(bases):
            basis = (basis + 1) / 2
            basis = basis / basis.max()
            basis_viz = np.zeros((basis.shape[0], basis.shape[1], 3), dtype=np.uint8)
            basis_viz[:, :, 0] = basis_colors[i][0]
            basis_viz[:, :, 1] = basis_colors[i][1]
            basis_viz[:, :, 2] = np.uint8(basis * 255)
            basis_viz = cv2.cvtColor(basis_viz, cv2.COLOR_HSV2RGB)
            axes[i // 2][i % 2].imshow(basis_viz)
        plt.show()

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info
                )
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
                )

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#7
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image, save_name):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]

        mask = predictions['instances'].raw_masks.squeeze(1).data.cpu().numpy(
        ) if predictions['instances'].has("raw_masks") else None

        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                pred_classes = torch.ones(instances.pred_classes.shape)
                # uncomment to open nms between different classes
                '''
                res = batched_nms(instances.pred_boxes.tensor, instances.scores, pred_classes, 0.5)
                print('res:', res)
                print('res:', res.size()[0])

                #instances.num_instances = res.size()[0]
                instances.pred_boxes.tensor = instances.pred_boxes.tensor[res]
                instances.pred_classes = instances.pred_classes[res]
                instances.scores = instances.scores[res]
                instances.pred_keypoints = instances.pred_keypoints[res]

                instances.predict_trans = instances.predict_trans[res]
                instances.predict_rotation = instances.predict_rotation[res]
                instances.predict_vertices = instances.predict_vertices[res]
                print('pred trans shape:', instances.predict_trans.shape)
                '''

                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

                output_trans_dir = './inference_val_translation/'
                output_rotation_dir = './inference_val_rotation/'
                output_mesh_dir = './inference_val_mesh/'
                output_cls_dir = './inference_val_cls/'
                output_score_dir = './inference_val_score/'

                save_name = save_name.split('/')[1]
                template_path = './merge_mean_car_shape/'
                faces = sr.Mesh.from_obj(template_path +
                                         'merge_mean_car_model_0.obj').faces

                for directory in [
                        output_trans_dir, output_rotation_dir, output_mesh_dir,
                        output_cls_dir, output_score_dir
                ]:
                    if not os.path.exists(directory):
                        os.makedirs(directory)

                for index in range(instances.predict_trans.shape[0]):
                    with open(
                            os.path.join(
                                output_trans_dir,
                                save_name + '_' + str(index) + '.json'),
                            'w') as f:
                        data = {}
                        data['translation'] = list(
                            instances.predict_trans[index].cpu().detach(
                            ).numpy().astype(float))
                        json.dump(data, f)

                for index in range(instances.predict_rotation.shape[0]):
                    with open(
                            os.path.join(
                                output_rotation_dir,
                                save_name + '_' + str(index) + '.json'),
                            'w') as f:
                        data = {}
                        data['rotation'] = list(
                            instances.predict_rotation[index].cpu().detach(
                            ).numpy().astype(float))
                        json.dump(data, f)

                for index in range(instances.pred_classes.shape[0]):
                    with open(
                            os.path.join(
                                output_cls_dir,
                                save_name + '_' + str(index) + '.json'),
                            'w') as f:
                        data = {}
                        data['car_id'] = int(instances.pred_classes[index].cpu(
                        ).detach().numpy().astype(float))
                        json.dump(data, f)

                for index in range(instances.scores.shape[0]):
                    with open(
                            os.path.join(
                                output_score_dir,
                                save_name + '_' + str(index) + '.json'),
                            'w') as f:
                        data = {}
                        data['score'] = float(instances.scores[index].cpu().
                                              detach().numpy().astype(float))
                        json.dump(data, f)

                for index in range(instances.predict_vertices.shape[0]):
                    vertices = instances.predict_vertices[index].unsqueeze(0)
                    sr.Mesh(vertices, faces).save_obj(os.path.join(
                        output_mesh_dir,
                        save_name + '_' + str(index) + '.obj'),
                                                      save_texture=False)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
class PredictionDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(cnt, frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                return cnt, panoptic_seg, segments_info
            elif "instances" in predictions:
                print("instances")
                predictions = predictions["instances"].to(self.cpu_device)
                return cnt, predictions
            elif "sem_seg" in predictions:
                print("sem_seg")
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
                return cnt, vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()
            for cnt, frame in enumerate(frame_gen):
                # print(cnt,1)
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for cnt, frame in enumerate(frame_gen):
                # print("non-parallel prediction",cnt)
                yield process_predictions(cnt, frame, self.predictor(frame))
class VisualizationDemo(object):
    def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")

        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode
        self.parallel = parallel

        if self.parallel == 1:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)
        self.video_visualizer = VideoVisualizer(self.metadata,
                                                self.instance_mode)

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            if "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                instances = self.video_visualizer.draw_instance_bbox(
                    predictions)

            return instances

        frame_gen = self._frame_from_video(video)

        if self.parallel == 1:
            buffer_size = self.predictor.default_buffer_size
            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#10
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def run_on_image_detection(self, image):
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image, self.metadata)
        highest = self.highest_only(predictions)
        vis_output = visualizer.draw_instance_predictions(predictions=highest)

        return highest, vis_output

    def highest_only(self, predict):
        instance = predict["instances"].to(self.cpu_device)
        image_size = instance.image_size
        get_scores = instance.get("scores")
        pred_classes_index = []
        if len(get_scores.tolist()) != 0:
            _, highest_index = torch.max(get_scores, 0)
            pred_classes_index.append(highest_index)
        pred_classes = self.tensor_transform(instance.get("pred_classes"),
                                             pred_classes_index)
        scores = self.tensor_transform(instance.get("scores"),
                                       pred_classes_index)
        pred_boxes = Boxes(
            self.tensor_transform(
                instance.get("pred_boxes").tensor, pred_classes_index))
        return Instances(image_size=image_size,
                         pred_boxes=pred_boxes,
                         scores=scores,
                         pred_classes=pred_classes)

    def run_on_image_flaw_only(self, image):
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)

        vis_output = visualizer.draw_instance_predictions(
            predictions=self.flaw_only(predictions))

        return predictions, vis_output

    def flaw_only(self, predict):
        instance = predict["instances"].to(self.cpu_device)
        image_size = instance.image_size
        get_pred_classes = instance.get("pred_classes").numpy()
        pred_classes_index = []
        pred_classes = []
        for c in range(len(get_pred_classes)):
            if get_pred_classes[c] != 0 and get_pred_classes[c] != 1:
                pred_classes_index.append(c)
                pred_classes.append(get_pred_classes[c])
        pred_classes = torch.from_numpy(np.asarray(pred_classes))
        scores = self.tensor_transform(instance.get("scores"),
                                       pred_classes_index)
        pred_masks = self.tensor_transform(instance.get("pred_masks"),
                                           pred_classes_index)
        pred_boxes = Boxes(
            self.tensor_transform(
                instance.get("pred_boxes").tensor, pred_classes_index))
        return Instances(image_size=image_size,
                         pred_boxes=pred_boxes,
                         scores=scores,
                         pred_classes=pred_classes,
                         pred_masks=pred_masks)

    def tensor_transform(self, t, indexes):
        tensor2array = t.numpy()
        new_array = []
        for index in indexes:
            new_array.append(tensor2array[index])
        new_array = torch.from_numpy(np.asarray(new_array))
        return new_array

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#11
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        # self.metadata = MetadataCatalog.get(
        #     cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
        # )

        # a = get_object_dicts("box/train")
        d = "train"
        # DatasetCatalog.register("box_" + d, lambda d=d: get_object_dicts("box/" + d))
        DatasetCatalog.register("box", self.fake_func)

        MetadataCatalog.get("box_" + d).thing_classes = ['box']
        self.metadata = MetadataCatalog.get("box")

        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def fake_func(self):
        return {}

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, zed, image, runtime_parameters):
        while True:
            if zed.grab(runtime_parameters) == sl.ERROR_CODE.SUCCESS:
                zed.retrieve_image(image, sl.VIEW.LEFT)
                frame_gen = image.get_data()
                frame_gen = cv2.cvtColor(frame_gen, cv2.COLOR_RGB2BGR)
                yield frame_gen
            else:
                break

    def _depth_from_video(self, zed, depth, runtime_parameters):
        while True:
            if zed.grab(runtime_parameters) == sl.ERROR_CODE.SUCCESS:
                zed.retrieve_measure(depth, sl.MEASURE.DEPTH)
                depth_gen = depth.get_data()
                yield depth_gen
            else:
                break

    def _frame_depth_from_video(self, pipeline, pc):
        while True:
            frames = pipeline.wait_for_frames()
            depth_frame = frames.get_depth_frame()
            color_frame = frames.get_color_frame()
            if not depth_frame or not color_frame:
                continue

            # Convert images to numpy arrays
            depth_map = np.asanyarray(depth_frame.get_data())
            color_image = np.asanyarray(color_frame.get_data())
            points = pc.calculate(depth_frame)
            v = points.get_vertices()
            point_cloud = np.asanyarray(v).view(np.float32).reshape(
                480, 640, 3)

            depth_colormap = cv2.applyColorMap(
                cv2.convertScaleAbs(depth_map, alpha=0.03), cv2.COLORMAP_JET)

            depth_colormap_dim = depth_colormap.shape
            color_colormap_dim = color_image.shape

            # If depth and color resolutions are different, resize color image to match depth image for display
            if depth_colormap_dim != color_colormap_dim:
                color_image = cv2.resize(color_image,
                                         dsize=(depth_colormap_dim[1],
                                                depth_colormap_dim[0]),
                                         interpolation=cv2.INTER_AREA)

            yield [color_image, depth_map, point_cloud]

    def run_on_video(self, pipeline, pc):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, point_cloud, depth, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # cv2.imwrite('image_raw.jpg', frame)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)

                if predictions.has("pred_masks"):
                    masks = predictions.pred_masks

                frame_visualizer = Visualizer(frame, self.metadata)
                mask_layer = frame_visualizer.get_mask_layer(masks=masks)
                # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)

                # depth_layer = []
                # start_time = time.time()
                # for i in range(len(mask_layer)):
                #     mask = mask_layer[i].mask
                #     for y in range(len(mask)):
                #         concate_depth = mask[y]*depth[y]
                #         # concate_depth = np.setdiff1d(concate_depth,np.array([float('nan')]))
                #         # concate_depth = np.nan_to_num(concate_depth)
                #         depth_layer.append(concate_depth)
                #     # f =  open('dummy_data/depth_map_{}.npy'.format(datetime.now().second), 'wb')
                #     f =  open('dummy_data/depth_map.npy', 'wb')
                #     np.save(f, depth_layer)
                #     np.save(f, point_cloud)
                #     np.save(f, mask)
                #     f.close()
                # end_time = time.time()
                # print('elapse time = ', end_time - start_time)
                # print('depth_layer ready ')

            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # import numpy as np
            # import matplotlib.pyplot as plt

            # ax = plt.axes(projection='3d')

            # ax.scatter3D(np.array(point_cloud_layer)[:,0], np.array(point_cloud_layer)[:,1], np.array(point_cloud_layer)[:,2], cmap='Greens', s=0.5)
            # plt.show()

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = vis_frame.get_image()
            # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            cv2.imwrite('dummy_data/image_seg.jpg', vis_frame)
            return vis_frame

        # frame_gen = self._frame_from_video(zed, image, runtime_parameters)
        # depth_gen = self._depth_from_video(zed, depth, runtime_parameters)
        data_gen = self._frame_depth_from_video(pipeline, pc)

        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame, depth, point_cloud in data_gen:
                # if cam_data.dtype == 'uint8':
                #     frame = cam_data
                # else:
                #     depth = cam_data

                yield process_predictions(frame, point_cloud, depth,
                                          self.predictor(frame))
示例#12
0
class Predictor(DefaultPredictor):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        DatasetCatalog.register("pfallcnt_pred", lambda d: [])
        MetadataCatalog.get("pfallcnt_pred").set(thing_classes=["0", "1"],
                                                 thing_colors=[(0, 255, 0),
                                                               (255, 0, 0)])
        self.metadata = MetadataCatalog.get("pfallcnt_pred")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "instances" in predictions:
            predictions['instances'] = predictions['instances'].to('cpu')
            indices = predictions['instances'].pred_classes == 1
            predictions['instances'] = predictions['instances'][indices]
            # if(len(predictions['instances']) == 0):
            #     vis_output = image
            # else:
            vis_output = visualizer.draw_instance_predictions(
                predictions=predictions['instances'])
        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "instances" in predictions:
                predictions['instances'] = predictions['instances'].to('cpu')
                indices = predictions['instances'].pred_classes == 1
                predictions['instances'] = predictions['instances'][indices]
                if (len(predictions['instances']) == 0):
                    vis_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                else:
                    vis_frame = video_visualizer.draw_instance_predictions(
                        frame, predictions['instances'])
                    # Converts Matplotlib RGB format to OpenCV BGR format
                    vis_frame = cv2.cvtColor(vis_frame.get_image(),
                                             cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#13
0
class VisualizationDemo(object):
    def __init__(self, cfg, args, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode
        self.draw_proposals = args.draw_proposals
        self.thresh = args.confidence_threshold
        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)
        
        self._init_visualization_metadata(cfg, args)
        
    def _init_visualization_metadata(self, cfg, args):
        """
        Initilize visualizer.
        Args:
            cfg (CfgNode)
        """
        self.metadata = create_visualization_metadata(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.
        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = InteractionVisualizer(image, self.metadata, instance_mode=self.instance_mode)
        if self.draw_proposals:
            instances = predictions["proposals"].to(self.cpu_device)
            vis_output = visualizer.draw_proposals(proposals=instances)
        elif "hoi_instances" in predictions:
            instances = predictions["hoi_instances"].to(self.cpu_device)
            instances = self._convert_hoi_instances(instances)
            vis_output = visualizer.draw_interaction_predictions(predictions=instances)
        elif "box_instances" in predictions:
            instances = predictions["box_instances"].to(self.cpu_device)
            vis_output = visualizer.draw_instance_predictions(predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.
        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.
        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if self.draw_proposals:
                instances = predictions["proposals"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_proposals(frame, instances, self.thresh)
            elif "hoi_instances" in predictions:
                instances = predictions["hoi_instances"].to(self.cpu_device)
                instances = self._convert_hoi_instances(instances)               
                vis_frame = video_visualizer.draw_interaction_predictions(frame, instances)
            elif "box_instances" in predictions:
                instances = predictions["box_instances"].to(self.cpu_device)
                instances = self._convert_hoi_instances(instances)               
                vis_frame = video_visualizer.draw_instance_predictions(frame, instances)

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
                
    def _convert_hoi_instances(self, instances):
        """
        Convert an "Instances" object to a HOI "Instances" by merging the predicted
        object class and action class to an interaction class.
        For example, object ("bench") + action ("sit on") -> interaction ("sit on bench")
        """
        num_instance = len(instances)
        if num_instance == 0:
            return instances
        # Meta data
        interaction_to_contiguous_id = self.metadata.get("interaction_to_contiguous_id", None)
        
        if interaction_to_contiguous_id:
            action_classes = self.metadata.get("action_classes", None)
            thing_classes = self.metadata.get("thing_classes", None)
            known_classes = self.metadata.get("known_classes", None)
            novel_classes = np.setdiff1d(thing_classes, known_classes).tolist()
        
            pred_object_classes = instances.object_classes.tolist()
            pred_action_classes = instances.action_classes.tolist()

            interaction_classes = []
            keep = []
            for ix in range(num_instance):
                object_id = pred_object_classes[ix]
                action_id = pred_action_classes[ix]
                # append detection results
                pred_action_name = action_classes[action_id]
                pred_object_name = thing_classes[object_id]
                pred_interaction_name = pred_action_name + " " + pred_object_name
                if pred_interaction_name in interaction_to_contiguous_id:
                    #interaction_id = interaction_to_contiguous_id[pred_interaction_name]
                    interaction_classes.append(pred_interaction_name)
                    keep.append(ix)
                elif pred_object_name in novel_classes:
                    # TODO: mine valid interaction with novel objects using external source.
                    # Interactions with novel object categories
                    interaction_classes.append(pred_interaction_name)
                    keep.append(ix)

            instances = instances[keep]
            instances.pred_classes = np.asarray(interaction_classes)
        return instances
示例#14
0
class VisualizationDemo(object):
    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def bbox_crop(self, frame, predictions, resx, resy, target_size,
                  target_instance, padding):
        predictions2 = predictions["instances"].to(torch.device("cpu"))
        boxes = predictions2.pred_boxes
        scores = predictions2.scores
        classes = predictions2.pred_classes
        things = self.metadata.get("thing_classes", None)

        people_list = list(
            idx for idx, value in enumerate(classes)
            if things[value] == target_instance and scores[idx] >= 0.9)

        # def calc_spread(val, offset, ceiling):
        #     if val + offset > ceiling:
        if len(people_list) > 0:
            max_score, max_index = scores.max(0)
            max_bbox = boxes[int(max_index)]
            bbox = max_bbox.tensor.tolist()[0]
            # size_x, size_y = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])

            minx, miny, maxx, maxy = max_bbox.tensor.tolist()[0]
            bbox_width, bbox_height = (maxx - minx), (maxy - miny)

            # Offset the image to make it square and add in padding
            if bbox_width > bbox_height:
                offset = (bbox_width - bbox_height)
                maxy += offset / 2
                miny -= offset / 2
            else:
                offset = (bbox_height - bbox_width)
                maxx += offset / 2
                minx -= offset / 2

            maxy += padding
            maxx += padding
            minx -= padding
            miny -= padding

            # Try to adjust image to make it valid
            if minx < 0:
                shift = abs(minx)
                minx += shift
                maxx += shift
            if maxx > resx:
                shift = maxx - resx
                minx -= shift
                maxx -= shift
            if miny < 0:
                shift = abs(miny)
                miny += shift
                maxy += shift
            if maxy > resy:
                shift = maxy - resy
                miny -= shift
                maxy -= shift

            if (minx < 0 or maxx > resx or miny < 0 or maxy > resy):
                return None
            else:
                bbox = (minx, miny, maxx, maxy)
                cropped_img = Image.fromarray(
                    frame[:, :, ::-1]).crop(bbox).resize(
                        (target_size, target_size), Image.ANTIALIAS)
                return cropped_img
        else:
            return None

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video, width, height, target_size, padding):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield self.bbox_crop(frame, predictions, width, height,
                                         target_size, "person", padding)
                    # yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield self.bbox_crop(frame, predictions, width, height,
                                     target_size, "person", padding)
                # yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield self.bbox_crop(frame, self.predictor(frame), width,
                                     height, target_size, "person", padding)
示例#15
0
class VisualizationDemo(object):
    def __init__(self,
                 cfg,
                 arg_metadata=None,
                 instance_mode=ColorMode.IMAGE,
                 parallel=False):
        """
        Args:
            cfg (CfgNode):
            arg_metadata (Metadata): Metadata in Metadata format (not json format)
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        print("According to the config we have " +
              str(cfg.MODEL.ROI_HEADS.NUM_CLASSES) + " classes.")

        #i need to add this metadata stuff according to https://github.com/facebookresearch/detectron2/issues/326 and https://github.com/facebookresearch/detectron2/issues/101
        if (arg_metadata is None):  #default value for COCO
            self.metadata = MetadataCatalog.get(
                cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
            print("I use the the default metadata which is:")
            print(MetadataCatalog.get(cfg.DATASETS.TEST[0]))
            #cfg.DATASETS.TRAIN is ('coco_2017_train',)
            #cfg.DATASETS.TEST[0] is coco_2017_val
            #MetadataCatalog.get(cfg.DATASETS.TEST[0]) is Metadata(evaluator_type='coco', image_root='datasets/coco/val2017', json_file='datasets/coco/annotations/instances_val2017.json', name='coco_2017_val', thing_classes=['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'], thing_colors=[[220, 20, 60], [119, 11, 32], [0, 0, 142], [0, 0, 230], [106, 0, 228], [0, 60, 100], [0, 80, 100], [0, 0, 70], [0, 0, 192], [250, 170, 30], [100, 170, 30], [220, 220, 0], [175, 116, 175], [250, 0, 30], [165, 42, 42], [255, 77, 255], [0, 226, 252], [182, 182, 255], [0, 82, 0], [120, 166, 157], [110, 76, 0], [174, 57, 255], [199, 100, 0], [72, 0, 118], [255, 179, 240], [0, 125, 92], [209, 0, 151], [188, 208, 182], [0, 220, 176], [255, 99, 164], [92, 0, 73], [133, 129, 255], [78, 180, 255], [0, 228, 0], [174, 255, 243], [45, 89, 255], [134, 134, 103], [145, 148, 174], [255, 208, 186], [197, 226, 255], [171, 134, 1], [109, 63, 54], [207, 138, 255], [151, 0, 95], [9, 80, 61], [84, 105, 51], [74, 65, 105], [166, 196, 102], [208, 195, 210], [255, 109, 65], [0, 143, 149], [179, 0, 194], [209, 99, 106], [5, 121, 0], [227, 255, 205], [147, 186, 208], [153, 69, 1], [3, 95, 161], [163, 255, 0], [119, 0, 170], [0, 182, 199], [0, 165, 120], [183, 130, 88], [95, 32, 0], [130, 114, 135], [110, 129, 133], [166, 74, 118], [219, 142, 185], [79, 210, 114], [178, 90, 62], [65, 70, 15], [127, 167, 115], [59, 105, 106], [142, 108, 45], [196, 172, 0], [95, 54, 80], [128, 76, 255], [201, 57, 1], [246, 0, 122], [191, 162, 208]], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21, 24: 22, 25: 23, 27: 24, 28: 25, 31: 26, 32: 27, 33: 28, 34: 29, 35: 30, 36: 31, 37: 32, 38: 33, 39: 34, 40: 35, 41: 36, 42: 37, 43: 38, 44: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 52: 46, 53: 47, 54: 48, 55: 49, 56: 50, 57: 51, 58: 52, 59: 53, 60: 54, 61: 55, 62: 56, 63: 57, 64: 58, 65: 59, 67: 60, 70: 61, 72: 62, 73: 63, 74: 64, 75: 65, 76: 66, 77: 67, 78: 68, 79: 69, 80: 70, 81: 71, 82: 72, 84: 73, 85: 74, 86: 75, 87: 76, 88: 77, 89: 78, 90: 79})
        else:  #custom metadata: this is my adaption so we can use our own classes for trained model on demo.py
            self.metadata = arg_metadata  #this is a dict that already includes name, thing_classes etc.
            print("I use the given metadata which is:")
            print(self.metadata)
            #self.metadata is Metadata(name='Custom_Audi_A2D2_Dataset_Training', thing_classes=['Animal', 'Bicycle', 'Bus', 'Car', 'Cyclist', 'EmergencyVehicle', 'MotorBiker', 'Motorcycle', 'Pedestrian', 'Truck', 'UtilityVehicle', 'VanSUV', 'Misc'])

        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            num_gpu = torch.cuda.device_count()
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.
        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)
                vis_output = visualizer.draw_instance_predictions(
                    predictions=instances)

        return predictions, vis_output

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.
        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.
        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))
示例#16
0
class VisualizationDemo(object):
    def __init__(self,
                 cfg,
                 debug,
                 instance_mode=ColorMode.IMAGE,
                 parallel=False):
        """
        Args:
            cfg (CfgNode):
            instance_mode (ColorMode):
            parallel (bool): whether to run the model in different processes from visualization.
                Useful since the visualization logic can be slow.
        """
        self.metadata = MetadataCatalog.get(
            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")
        self.instance_mode = instance_mode

        self.parallel = parallel
        if parallel:
            if debug:
                print('use parallel in fucntion predictor.py')
            num_gpu = torch.cuda.device_count()
            if debug:
                print('num_gpu : ', num_gpu)
            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
        else:
            if debug:
                print('no use parallel in function predictor.py')
            self.predictor = DefaultPredictor(cfg)

    def run_on_image(self, image, debug):
        """
        Args:
            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
                This is the format used by OpenCV.

        Returns:
            predictions (dict): the output of the model.
            vis_output (VisImage): the visualized image output.
        """
        vis_output = None
        obj = None
        predictions = self.predictor(image)
        # Convert image from OpenCV BGR format to Matplotlib RGB format.
        image = image[:, :, ::-1]
        visualizer = Visualizer(image,
                                self.metadata,
                                instance_mode=self.instance_mode)
        if "panoptic_seg" in predictions:
            panoptic_seg, segments_info = predictions["panoptic_seg"]
            vis_output = visualizer.draw_panoptic_seg_predictions(
                panoptic_seg.to(self.cpu_device), segments_info)
            if debug:
                print('in panoptic_seg')
        else:
            if "sem_seg" in predictions:
                vis_output = visualizer.draw_sem_seg(
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))
                if debug:
                    print("in sem_seg")
            if "instances" in predictions:
                instances = predictions["instances"].to(self.cpu_device)

                if debug:
                    vis_output = visualizer.draw_instance_predictions(
                        predictions=instances)
                    print('in instances')

                #if output is json, debug is false
                if not debug:
                    boxes = instances.pred_boxes.tensor.numpy(
                    ) if instances.has("pred_boxes") else None
                    scores = instances.scores if instances.has(
                        'scores') else None
                    classes = instances.pred_classes if instances.has(
                        "pred_classes") else None
                    labels = _create_text_labels(
                        classes, scores,
                        visualizer.metadata.get("thing_classes", None))
                    keypoints = instances.pred_keypoints if instances.has(
                        "pred_keypoints") else None

                    if instances.has("pred_masks"):
                        masks = np.asarray(instances.pred_masks)
                        masks = [
                            GenericMask(x, visualizer.output.height,
                                        visualizer.output.width) for x in masks
                        ]
                    else:
                        masks = None

                    obj = {}

                    for i, _ in enumerate(labels):
                        tmp = {}
                        split = labels[i].split()
                        tmp['class'] = split[0]
                        tmp['score'] = scores[i].item()
                        tmp['box'] = {}
                        tmp['box']['left-up'] = [
                            boxes[i][0].item(), boxes[i][1].item()
                        ]
                        tmp['box']['right-down'] = [
                            boxes[i][2].item(), boxes[i][3].item()
                        ]
                        tmp['polygons'] = {}

                        if masks is not None:
                            for idx, segment in enumerate(masks[i].polygons):
                                tmp['polygons'][idx] = segment.reshape(
                                    -1, 2).tolist()

                        obj[i] = tmp

        return predictions, vis_output, obj

    def _frame_from_video(self, video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self, video):
        """
        Visualizes predictions on frames of the input video.

        Args:
            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
                either a webcam or a video file.

        Yields:
            ndarray: BGR visualizations of each video frame.
        """
        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)

        def process_predictions(frame, predictions):
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if "panoptic_seg" in predictions:
                panoptic_seg, segments_info = predictions["panoptic_seg"]
                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
                    frame, panoptic_seg.to(self.cpu_device), segments_info)
            elif "instances" in predictions:
                predictions = predictions["instances"].to(self.cpu_device)
                vis_frame = video_visualizer.draw_instance_predictions(
                    frame, predictions)
            elif "sem_seg" in predictions:
                vis_frame = video_visualizer.draw_sem_seg(
                    frame,
                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device))

            # Converts Matplotlib RGB format to OpenCV BGR format
            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
            return vis_frame

        frame_gen = self._frame_from_video(video)
        if self.parallel:
            buffer_size = self.predictor.default_buffer_size

            frame_data = deque()

            for cnt, frame in enumerate(frame_gen):
                frame_data.append(frame)
                self.predictor.put(frame)

                if cnt >= buffer_size:
                    frame = frame_data.popleft()
                    predictions = self.predictor.get()
                    yield process_predictions(frame, predictions)

            while len(frame_data):
                frame = frame_data.popleft()
                predictions = self.predictor.get()
                yield process_predictions(frame, predictions)
        else:
            for frame in frame_gen:
                yield process_predictions(frame, self.predictor(frame))