Exemplo n.º 1
0
def fire_detection(source, video_path, output, fps, frame_size):
    """
    基于深度学习的烟火检测器
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            fire_in = device.getInputQueue("fire_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        fire_nn = device.getOutputQueue("fire_nn")

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        label_map = ["fire", "normal", "smoke"]
        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, fire_in, 224, 224)
            fire_data = toTensorResult(fire_nn.get()).get("final_result")
            fps_handler.tick("fire")
            conf = np.max(fire_data)
            if conf > 0.5:
                label = label_map[np.argmax(fire_data)]

                drawText(frame_debug, f"{label}", (10, 30), "black")
                drawText(frame_debug, f"{conf:.2%}", (10, 50), "black")
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()
Exemplo n.º 2
0
def blink(source, video_path, output, fps, frame_size):
    """
    在视频流中实时检测和计数眨眼次数
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            mesh_in = device.getInputQueue("mesh_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        mesh_nn = device.getOutputQueue("mesh_nn")

        eye_in = device.getInputQueue("eye_in")
        eye_nn = device.getOutputQueue("eye_nn")
        left_eye_blink = []
        right_eye_blink = []

        left_number_of_blinks = 0
        right_number_of_blinks = 0

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, mesh_in, 192, 192)
            mesh_data = toTensorResult(mesh_nn.get())
            fps_handler.tick("Mesh")

            score = mesh_data.get("conv2d_31").reshape((1, ))
            if score > 0.5:
                mesh = mesh_data.get("conv2d_21").reshape((468, 3))

                wh = np.array(frame_debug.shape[:2])[::-1]
                mesh *= np.array([*wh / 192, 1])

                for v in mesh.astype(int):
                    cv2.circle(frame_debug, v[:2], 1, (191, 255, 0))

                left_eye, left_box = get_mini_box_frame(
                    frame_debug,
                    np.array([mesh[71], mesh[107], mesh[116], mesh[197]]),
                )

                run_nn(left_eye, eye_in, 32, 32)
                left_eye_blink.append(
                    np.argmax(toTensorResult(eye_nn.get()).get("19")))

                if (len(left_eye_blink) > 5
                        and left_eye_blink[-1] not in left_eye_blink[-5:-1]):
                    left_number_of_blinks += 1

                if len(left_eye_blink) > 20:
                    del left_eye_blink[0]

                right_eye, right_box = get_mini_box_frame(
                    frame_debug,
                    np.array([mesh[301], mesh[336], mesh[345], mesh[197]]),
                )

                run_nn(right_eye, eye_in, 32, 32)
                right_eye_blink.append(
                    np.argmax(toTensorResult(eye_nn.get()).get("19")))
                if (len(right_eye_blink) > 5
                        and right_eye_blink[-1] not in right_eye_blink[-5:-1]):
                    right_number_of_blinks += 1

                if len(right_eye_blink) > 20:
                    del right_eye_blink[0]

                cv2.drawContours(frame_debug, [right_box], -1, (0, 139, 0), 2)
                cv2.drawContours(frame_debug, [left_box], -1, (0, 139, 0), 2)
                cv2.imshow("left", left_eye)
                cv2.imshow("right", right_eye)

            drawText(
                frame_debug,
                f"NumberOfBlinksOfRightEye: {right_number_of_blinks}",
                (20, 50),
                color="red",
            )
            drawText(
                frame_debug,
                f"NumberOfBlinksOfLeftEye: {left_number_of_blinks}",
                (20, 80),
                color="red",
            )

            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()
Exemplo n.º 3
0
class Main:
    def __init__(self):
        print("Loading pipeline...")
        self.start_pipeline()
        if not camera:
            self.cap = cv2.VideoCapture(
                str(Path(args.video).resolve().absolute()))
        self.fps_handler = FPSHandler()
        self.frame = None
        self.face_box_q = queue.Queue()
        self.bboxes = []
        self.running = True
        self.result = None
        self.face_bbox = None

    def start_pipeline(self):
        print("Starting pipeline...")
        self.device = depthai.Device(create_pipeline())
        if camera:
            self.cam_out = self.device.getOutputQueue("cam_out", 1, False)
        else:
            self.face_in = self.device.getInputQueue("face_in")

    def draw_bbox(self, bbox, color):
        cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                      color, 2)

    def run_face(self):
        face_nn = self.device.getOutputQueue("face_nn")
        land68_in = self.device.getInputQueue("land68_in", 4, False)
        while self.running:
            if self.frame is None:
                continue
            bboxes = np.array(face_nn.get().getFirstLayerFp16())
            bboxes = bboxes.reshape((bboxes.size // 7, 7))
            self.bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]
            for raw_bbox in self.bboxes:
                bbox = frame_norm(self.frame, raw_bbox)
                det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                land68_data = depthai.NNData()
                land68_data.setLayer("data", to_planar(det_frame, (160, 160)))
                land68_in.send(land68_data)
                self.face_box_q.put(bbox)

    def run_land68(self):
        land68_nn = self.device.getOutputQueue("land68_nn", 4, False)

        while self.running:
            self.results = queue.Queue()
            self.face_bboxs = queue.Queue()
            while len(self.bboxes):
                face_bbox = self.face_box_q.get()
                face_bbox[1] -= 15
                face_bbox[3] += 15
                face_bbox[0] -= 15
                face_bbox[2] += 15
                self.face_bboxs.put(face_bbox)
                face_frame = self.frame[face_bbox[1]:face_bbox[3],
                                        face_bbox[0]:face_bbox[2]]
                land68_data = land68_nn.get()
                out = to_tensor_result(land68_data).get(
                    "StatefulPartitionedCall/strided_slice_2/Split.0")
                result = coordinate(face_frame, *out)
                self.results.put(result)

    def should_run(self):
        return True if camera else self.cap.isOpened()

    def get_frame(self, retries=0):
        if camera:
            rgb_output = self.cam_out.get()
            return True, np.array(rgb_output.getData()).reshape(
                (3, rgb_output.getHeight(),
                 rgb_output.getWidth())).transpose(1, 2, 0).astype(np.uint8)
        else:
            read_correctly, new_frame = self.cap.read()
            if not read_correctly or new_frame is None:
                if retries < 5:
                    return self.get_frame(retries + 1)
                else:
                    print("Source closed, terminating...")
                    return False, None
            else:
                return read_correctly, new_frame

    def run(self):
        self.threads = [
            threading.Thread(target=self.run_face, daemon=True),
            threading.Thread(target=self.run_land68, daemon=True),
        ]
        for thread in self.threads:
            thread.start()
        while self.should_run():
            read_correctly, new_frame = self.get_frame()
            if not read_correctly:
                break
            self.fps_handler.tick("Frame")
            self.frame = new_frame
            self.debug_frame = self.frame.copy()
            if not camera:
                nn_data = depthai.NNData()
                nn_data.setLayer("data", to_planar(self.frame, (300, 300)))
                self.face_in.send(nn_data)
            if debug:
                if self.results.qsize() > 0 and self.face_bboxs.qsize() > 0:
                    try:
                        for i in range(self.results.qsize()):
                            face_bbox = self.face_bboxs.get()
                            result = self.results.get()
                            bbox = frame_norm(self.frame, self.bboxes[i])
                            self.draw_bbox(bbox, (0, 255, 0))
                            self.hand_points = []
                            # 17 Left eyebrow upper left corner/21 Left eyebrow right corner/22 Right eyebrow upper left corner/26 Right eyebrow upper right corner/36 Left eye upper left corner/39 Left eye upper right corner/42 Right eye upper left corner/
                            # 45 Upper right corner of the right eye/31 Upper left corner of the nose/35 Upper right corner of the nose/48 Upper left corner/54 Upper right corner of the mouth/57 Lower central corner of the mouth/8 Chin corner
                            # The coordinates are two points, so you have to multiply by 2.
                            self.hand_points.append(
                                (result[34] + face_bbox[0],
                                 result[35] + face_bbox[1]))
                            self.hand_points.append(
                                (result[42] + face_bbox[0],
                                 result[43] + face_bbox[1]))
                            self.hand_points.append(
                                (result[44] + face_bbox[0],
                                 result[45] + face_bbox[1]))
                            self.hand_points.append(
                                (result[52] + face_bbox[0],
                                 result[53] + face_bbox[1]))
                            self.hand_points.append(
                                (result[72] + face_bbox[0],
                                 result[73] + face_bbox[1]))
                            self.hand_points.append(
                                (result[78] + face_bbox[0],
                                 result[79] + face_bbox[1]))
                            self.hand_points.append(
                                (result[84] + face_bbox[0],
                                 result[85] + face_bbox[1]))
                            self.hand_points.append(
                                (result[90] + face_bbox[0],
                                 result[91] + face_bbox[1]))
                            self.hand_points.append(
                                (result[62] + face_bbox[0],
                                 result[63] + face_bbox[1]))
                            self.hand_points.append(
                                (result[70] + face_bbox[0],
                                 result[71] + face_bbox[1]))
                            self.hand_points.append(
                                (result[96] + face_bbox[0],
                                 result[97] + face_bbox[1]))
                            self.hand_points.append(
                                (result[108] + face_bbox[0],
                                 result[109] + face_bbox[1]))
                            self.hand_points.append(
                                (result[114] + face_bbox[0],
                                 result[115] + face_bbox[1]))
                            self.hand_points.append(
                                (result[16] + face_bbox[0],
                                 result[17] + face_bbox[1]))
                            for i in self.hand_points:
                                cv2.circle(
                                    self.debug_frame,
                                    (i[0], i[1]),
                                    2,
                                    (255, 0, 0),
                                    thickness=1,
                                    lineType=8,
                                    shift=0,
                                )
                            reprojectdst, _, pitch, yaw, roll = get_head_pose(
                                np.array(self.hand_points))
                            """
                            pitch > 0 Head down, < 0 look up
                            yaw > 0 Turn right < 0 Turn left
                            roll > 0 Tilt right, < 0 Tilt left
                            """
                            cv2.putText(
                                self.debug_frame,
                                "pitch:{:.2f}, yaw:{:.2f}, roll:{:.2f}".format(
                                    pitch, yaw, roll),
                                (face_bbox[0] - 30, face_bbox[1] - 30),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.45,
                                (255, 0, 0),
                            )

                            hand_attitude = np.array(
                                [abs(pitch), abs(yaw),
                                 abs(roll)])
                            max_index = np.argmax(hand_attitude)
                            if max_index == 0:
                                if pitch > 0:
                                    cv2.putText(
                                        self.debug_frame,
                                        "Head down",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                                else:
                                    cv2.putText(
                                        self.debug_frame,
                                        "look up",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                            elif max_index == 1:
                                if yaw > 0:
                                    cv2.putText(
                                        self.debug_frame,
                                        "Turn right",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                                else:
                                    cv2.putText(
                                        self.debug_frame,
                                        "Turn left",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                            elif max_index == 2:
                                if roll > 0:
                                    cv2.putText(
                                        self.debug_frame,
                                        "Tilt right",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                                else:
                                    cv2.putText(
                                        self.debug_frame,
                                        "Tilt left",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX,
                                        0.5,
                                        (235, 10, 10),
                                    )
                            # Draw a cube with 12 axes
                            line_pairs = [
                                [0, 1],
                                [1, 2],
                                [2, 3],
                                [3, 0],
                                [4, 5],
                                [5, 6],
                                [6, 7],
                                [7, 4],
                                [0, 4],
                                [1, 5],
                                [2, 6],
                                [3, 7],
                            ]
                            for start, end in line_pairs:
                                cv2.line(
                                    self.debug_frame,
                                    reprojectdst[start],
                                    reprojectdst[end],
                                    (0, 0, 255),
                                )
                    except:
                        pass
                if camera:
                    cv2.imshow("Camera view", self.debug_frame)
                else:
                    aspect_ratio = self.frame.shape[1] / self.frame.shape[0]
                    cv2.imshow(
                        "Video view",
                        cv2.resize(self.debug_frame,
                                   (int(900), int(900 / aspect_ratio))),
                    )
                if cv2.waitKey(1) == ord("q"):
                    cv2.destroyAllWindows()
                    break

        self.fps_handler.printStatus()
        if not camera:
            self.cap.release()
        cv2.destroyAllWindows()
        self.running = False
        for thread in self.threads:
            thread.join(2)
            if thread.is_alive():
                break
Exemplo n.º 4
0
class DepthAI:
    distance_guardian_class = DistanceGuardianDebug
    distance_bird_class = Bird

    def __init__(self):
        self.frame_size = (672, 384)
        self.create_pipeline()
        self.start_pipeline()
        self.fps_handler = FPSHandler()
        self.distance_guardian = self.distance_guardian_class()
        self.distance_bird = self.distance_bird_class(self.frame_size)

    def create_pipeline(self):
        self.pipeline = dai.Pipeline()
        cam = self.pipeline.createColorCamera()
        cam.setResolution(
            dai.ColorCameraProperties.SensorResolution.THE_1080_P)
        cam.setPreviewSize(self.frame_size[0], self.frame_size[1])
        cam.setInterleaved(False)
        cam.setBoardSocket(dai.CameraBoardSocket.RGB)
        self.cam_xout = self.pipeline.createXLinkOut()
        self.cam_xout.setStreamName("cam_out")
        self.create_models(
            "models/person-detection-retail-0013/pedestrian-detection-adas-0002.blob",
            "model", cam)
        self.model.passthrough.link(self.cam_xout.input)

        self.StereoDepthXLink()
        self.stereo.depth.link(self.model.inputDepth)
        self.model.passthroughDepth.link(self.stereo_out.input)

    def create_models(self, model_path, name, cam=None):
        print(f"正在创建{model_path}神经网络...")
        self.model = self.pipeline.createMobileNetSpatialDetectionNetwork()
        self.model.setBlobPath(str(Path(model_path).resolve().absolute()))
        self.model.setConfidenceThreshold(0.5)
        self.model.input.setBlocking(False)
        self.model.setBoundingBoxScaleFactor(0.5)
        self.model.setDepthLowerThreshold(100)
        self.model.setDepthUpperThreshold(5000)
        cam.preview.link(self.model.input)
        self.model_out = self.pipeline.createXLinkOut()
        self.model_out.setStreamName(f"{name}_nn")
        self.model.out.link(self.model_out.input)

    def StereoDepthXLink(self):
        print(f"正在创建深度节点...")
        self.stereo = self.pipeline.createStereoDepth()
        self.stereo.initialConfig.setConfidenceThreshold(230)
        self.stereo_out = self.pipeline.createXLinkOut()
        self.stereo_out.setStreamName("depth")

        mono_left = self.pipeline.createMonoCamera()
        mono_left.setResolution(
            dai.MonoCameraProperties.SensorResolution.THE_400_P)
        mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
        mono_right = self.pipeline.createMonoCamera()
        mono_right.setResolution(
            dai.MonoCameraProperties.SensorResolution.THE_400_P)
        mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
        mono_left.out.link(self.stereo.left)
        mono_right.out.link(self.stereo.right)

    def start_pipeline(self):
        print(f"启动设备管道...")
        self.device = dai.Device(self.pipeline)
        self.camRgb = self.device.getOutputQueue(self.cam_xout.getStreamName(),
                                                 4, False)
        self.person_nn = self.device.getOutputQueue(
            self.model_out.getStreamName(), 4, False)
        self.depth = self.device.getOutputQueue(
            self.stereo_out.getStreamName(), 4, False)

    def draw_bbox(self, bbox, color, detection):
        cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                      color, 2)
        cv2.putText(
            self.debug_frame,
            "x: {}m".format(round(detection.spatialCoordinates.x) / 1000),
            (bbox[0], bbox[1] + 30), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
        cv2.putText(
            self.debug_frame,
            "y: {}m".format(round(detection.spatialCoordinates.y) / 1000),
            (bbox[0], bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
        cv2.putText(
            self.debug_frame,
            "z: {}m".format(round(detection.spatialCoordinates.z) / 1000),
            (bbox[0], bbox[1] + 70), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
        cv2.putText(self.debug_frame,
                    "conf: {}".format(round(detection.confidence * 100)),
                    (bbox[0], bbox[1] + 90), cv2.FONT_HERSHEY_TRIPLEX, 0.4,
                    color)

    def run_model(self):
        width = self.frame.shape[1]
        height = self.frame.shape[0]
        nn_data = self.person_nn.get()
        if nn_data is not None:
            boxse = []
            detections = nn_data.detections
            for detection in detections:
                x_min = int(detection.xmin * width)
                y_min = int(detection.ymin * height)
                x_max = int(detection.xmax * width)
                y_max = int(detection.ymax * height)
                bbox = (x_min, y_min, x_max, y_max)
                self.draw_bbox(bbox, (10, 245, 10), detection)
                boxse.append({
                    "id": uuid.uuid4(),
                    "x_min": x_min,
                    "y_min": y_min,
                    "x_max": x_max,
                    "y_max": y_max,
                    "depth_x": detection.spatialCoordinates.x / 1000,
                    "depth_y": detection.spatialCoordinates.y / 1000,
                    "depth_z": detection.spatialCoordinates.z / 1000
                })
            results = self.distance_guardian.parse_frame(
                self.debug_frame, boxse)
            self.bird_frame = self.distance_bird.parse_frame(
                self.debug_frame, results, boxse)

            depthFrame = self.depth.get().getFrame()
            self.depthFrameColor = cv2.normalize(depthFrame, None, 255, 0,
                                                 cv2.NORM_INF, cv2.CV_8UC1)
            self.depthFrameColor = cv2.equalizeHist(self.depthFrameColor)
            self.depthFrameColor = cv2.applyColorMap(self.depthFrameColor,
                                                     cv2.COLORMAP_JET)

    def parse(self):
        if debug:
            self.debug_frame = self.frame.copy()
        self.run_model()
        self.fps_handler.tick("NN")
        if debug:
            numpy_horizontal = np.hstack((self.debug_frame, self.bird_frame))
            cv2.imshow("Frame", numpy_horizontal)
            cv2.imshow("depth", self.depthFrameColor)
            if cv2.waitKey(1) == ord('q'):
                cv2.destroyAllWindows()
                raise StopIteration()

    def run_camera(self):
        while True:
            rgb_in = self.camRgb.get()
            self.frame = rgb_in.getCvFrame()
            try:
                self.parse()
            except StopIteration:
                break

    def run(self):
        self.run_camera()
        self.fps_handler.printStatus()

    def __del__(self):
        del self.pipeline
        del self.device
Exemplo n.º 5
0
def vehicle(source, video_path, output, fps, frame_size):
    """
    车辆属性识别和车牌识别
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            vehicle_in = device.getInputQueue("vehicle_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        vehicle_nn = device.getOutputQueue("vehicle_nn")

        attr_in = device.getInputQueue("attr_in")
        attr_nn = device.getOutputQueue("attr_nn")
        license_in = device.getInputQueue("license_in")
        license_nn = device.getOutputQueue("license_nn")

        colors = ["white", "gray", "yellow", "red", "green", "blue", "black"]
        types = ["car", "bus", "truck", "van"]

        license_dict = [
            *map(chr, range(48, 58)),
            "<Anhui>",
            "<Beijing>",
            "<Chongqing>",
            "<Fujian>",
            "<Gansu>",
            "<Guangdong>",
            "<Guangxi>",
            "<Guizhou>",
            "<Hainan>",
            "<Hebei>",
            "<Heilongjiang>",
            "<Henan>",
            "<HongKong>",
            "<Hubei>",
            "<Hunan>",
            "<InnerMongolia>",
            "<Jiangsu>",
            "<Jiangxi>",
            "<Jilin>",
            "<Liaoning>",
            "<Macau>",
            "<Ningxia>",
            "<Qinghai>",
            "<Shaanxi>",
            "<Shandong>",
            "<Shanghai>",
            "<Shanxi>",
            "<Sichuan>",
            "<Tianjin>",
            "<Tibet>",
            "<Xinjiang>",
            "<Yunnan>",
            "<Zhejiang>",
            "<police>",
            *map(chr, range(65, 91)),
        ]

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, vehicle_in, 300, 300)
            vehicle_data = vehicle_nn.get().detections
            fps_handler.tick("vehicle")

            for bbox in vehicle_data:
                if bbox.label == 1:
                    vehicle_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])

                    cv2.rectangle(frame_debug, vehicle_coord[:2],
                                  vehicle_coord[2:], (128, 128, 0))

                    vehicle_frame = frame[vehicle_coord[1]:vehicle_coord[3],
                                          vehicle_coord[0]:vehicle_coord[2], ]
                    run_nn(vehicle_frame, attr_in, 72, 72)
                    attr_data = toTensorResult(attr_nn.get())
                    color_ = colors[attr_data.get("color").argmax()]
                    type_ = types[attr_data.get("type").argmax()]
                    drawText(
                        frame_debug,
                        color_,
                        (vehicle_coord[0] + 10, vehicle_coord[1] + 10),
                    )
                    drawText(
                        frame_debug,
                        type_,
                        (vehicle_coord[0] + 10, vehicle_coord[1] + 25),
                    )
                elif bbox.label == 2:
                    plate_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])
                    cv2.rectangle(frame_debug, plate_coord[:2],
                                  plate_coord[2:], (128, 128, 0))

                    plate_frame = frame[plate_coord[1]:plate_coord[3],
                                        plate_coord[0]:plate_coord[2], ]
                    plate_frame = pad_resize(plate_frame, (24, 94))

                    # cv2.imshow("pl",plate_frame.astype(np.uint8))
                    run_nn(plate_frame, license_in, 94, 24)
                    license_data = (toTensorResult(
                        license_nn.get()).get("d_predictions.0").squeeze())
                    plate_str = ""
                    for j in license_data:
                        if j == -1:
                            break
                        plate_str += license_dict[j]
                    drawText(
                        frame_debug,
                        plate_str,
                        (plate_coord[0] - 10, plate_coord[1] - 10),
                    )
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()
Exemplo n.º 6
0
def yolox(source, model_name, model_size, video_path, output, fps, frame_size):
    """
    This function is used to detect objects in a video

    :param model_name: The name of the model to use
    :param model_size: Size of the model
    :param video_path: Path to the video file
    :param output: The output file name
    :param fps: The FPS (frames per second) of the output video
    :param frame_size: The size of the frame to be saving
    """
    """
    基于 yolox 的目标检测器
    """
    model_w = model_size[0]
    model_h = model_size[1]
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source, model_name, model_w, model_h), device_info) as device:
        print("Starting pipeline...")
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            yolox_det_in = device.getInputQueue("yolox_det_in")
        else:
            cam_out = device.getOutputQueue("cam_out", 1, True)
        yolox_det_nn = device.getOutputQueue("yolox_det_nn")

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            """
            Get the current frame from the camera and return it
            """
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            fps_handler.tick("Frame")
            if not read_correctly:
                break

            frame_debug = frame.copy()
            if source:
                run_nn(yolox_det_in, to_planar(frame, (model_h, model_w)), model_w, model_h)
            yolox_det_data = yolox_det_nn.get()

            res = toTensorResult(yolox_det_data).get("output")
            fps_handler.tick("nn")
            predictions = demo_postprocess(res, (model_h, model_w), p6=False)[0]

            boxes = predictions[:, :4]
            scores = predictions[:, 4, None] * predictions[:, 5:]

            boxes_xyxy = np.ones_like(boxes)
            boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
            boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
            boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
            boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0

            input_shape = np.array([model_h, model_w])
            min_r = (input_shape / frame.shape[:2]).min()
            offset = (np.array(frame.shape[:2]) * min_r - input_shape) / 2
            offset = np.ravel([offset, offset])
            boxes_xyxy = (boxes_xyxy + offset[::-1]) / min_r

            dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.2)

            if dets is not None:
                final_boxes = dets[:, :4]
                final_scores, final_cls_inds = dets[:, 4], dets[:, 5]
                frame_debug = vis(
                    frame_debug,
                    final_boxes,
                    final_scores,
                    final_cls_inds,
                    conf=0.5,
                    class_names=LABELS.get(model_name),
                )
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" % time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
    cv2.destroyAllWindows()
Exemplo n.º 7
0
                    break

                nm.sendInputFrame(frame)
                fps.tick('host')

                if debug:
                    show(frame)
                    cv2.putText(frame,
                                f"RGB FPS: {round(fps.tickFps('host'), 1)}",
                                (5, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (0, 255, 0))
                    cv2.putText(frame,
                                f"NN FPS:  {round(fps.tickFps('nn'), 1)}",
                                (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (0, 255, 0))
                    cv2.imshow("rgb", frame)

            key = cv2.waitKey(1)
            if key == ord('q'):
                break

    except KeyboardInterrupt:
        pass

    running = False

t.join()
fps.printStatus()
if not args.camera:
    cap.release()
Exemplo n.º 8
0
def palm(source, video_path, output, fps, frame_size):
    """
    手掌检测,控制鼠标
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            palm_in = device.getInputQueue("palm_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        palm_nn = device.getOutputQueue("palm_nn")

        dots = []

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, palm_in, 128, 128)
            results = toTensorResult(palm_nn.get())
            fps_handler.tick("palm")

            num_keypoints = 7
            min_score_thresh = 0.7
            anchors = np.load("anchors_palm.npy")

            raw_box_tensor = results.get("regressors")  # regress
            raw_score_tensor = results.get("classificators")  # classification
            detections = raw_to_detections(raw_box_tensor, raw_score_tensor,
                                           anchors, (128, 128), num_keypoints)

            palm_coords = [
                frameNorm(frame, obj[:4]) for det in detections for obj in det
                if obj[-1] > min_score_thresh
            ]

            palm_confs = [
                obj[-1] for det in detections for obj in det
                if obj[-1] > min_score_thresh
            ]

            if len(palm_coords) > 0:
                palm_coords = non_max_suppression(
                    boxes=np.concatenate(palm_coords).reshape(-1, 4),
                    probs=palm_confs,
                    overlapThresh=0.1,
                )

                for bbox in palm_coords:
                    cv2.rectangle(frame_debug, bbox[:2], bbox[2:],
                                  (10, 245, 10))
                    dot_x = (bbox[2] + bbox[0]) / 2
                    dot_y = (bbox[3] + bbox[1]) / 2
                    dots = move_mouse(dots, (dot_x, dot_y), frame.shape[:2])
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()
Exemplo n.º 9
0
def facial_info(source, video_path, output, fps, frame_size):
    """
    面部信息识别
    """
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        print("Starting pipeline...")
        # device.startPipeline()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            face_in = device.getInputQueue("face_in")
        else:
            cam_out = device.getOutputQueue("cam_out", 1, True)
        face_nn = device.getOutputQueue("face_nn")
        head_pose_in = device.getInputQueue("head_pose_in")
        head_pose_nn = device.getOutputQueue("head_pose_nn")
        age_in = device.getInputQueue("age_in")
        age_nn = device.getOutputQueue("age_nn")
        emo_in = device.getInputQueue("emo_in")
        emo_nn = device.getOutputQueue("emo_nn")

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        fps_handler = FPSHandler()

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(face_in, frame, 300, 300)
            face_nn_data = face_nn.get()
            fps_handler.tick("All")
            if face_nn_data is not None:
                bboxes = face_nn_data.detections

                for bbox in bboxes:
                    face_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])
                    face_frame = frame[face_coord[1]:face_coord[3],
                                       face_coord[0]:face_coord[2], ]
                    cv2.rectangle(
                        frame_debug,
                        (face_coord[0], face_coord[1]),
                        (face_coord[2], face_coord[3]),
                        (0, 0, 0),
                    )

                    run_nn(head_pose_in, face_frame, 60, 60)
                    roll_degree = toTensorResult(
                        head_pose_nn.get()).get("angle_r_fc")[0][0]
                    center = (
                        (face_coord[2] + face_coord[0]) / 2,
                        (face_coord[3] + face_coord[1]) / 2,
                    )
                    size = (
                        (face_coord[2] - face_coord[0]),
                        (face_coord[3] - face_coord[1]),
                    )
                    face_frame_corr = rotate_frame(
                        frame,
                        center,
                        size,
                        roll_degree,
                    )
                    cv2.imshow("face_frame_corr", face_frame_corr)

                    run_nn(age_in, face_frame_corr, 62, 62)
                    age_gender = toTensorResult(age_nn.get())
                    age = age_gender.get("age_conv3").squeeze() * 100
                    # 0 - female, 1 - male
                    gender = "Male" if age_gender.get(
                        "prob").argmax() else "Female"
                    drawText(
                        frame_debug,
                        f"Age: {age:0.0f}",
                        (face_coord[0] + 10, face_coord[1] + 30),
                        color="greenyellow",
                    )
                    drawText(
                        frame_debug,
                        f"Gender: {gender}",
                        (face_coord[0] + 10, face_coord[1] + 50),
                        "greenyellow",
                    )

                    run_nn(emo_in, face_frame_corr, 64, 64)
                    # 0 - 'neutral', 1 - 'happy', 2 - 'sad', 3 - 'surprise', 4 - 'anger'
                    emo = ["neutral", "happy", "sad", "surprise", "anger"]
                    emo = emo[toTensorResult(
                        emo_nn.get()).get("prob_emotion").argmax()]
                    drawText(
                        frame_debug,
                        f"emo: {emo}",
                        (face_coord[0] + 10, face_coord[1] + 70),
                        "greenyellow",
                    )

            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            cv2.imshow("debug", frame_debug)
            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        if source:
            cap.release()
        if output:
            writer.release()
        fps_handler.printStatus()
        cv2.destroyAllWindows()