def __init__(self,
                 videoPath = '/dev/video0',
                 videoW = 1024,
                 videoH = 768,
                 fontScale = 1.0,
                 verbose = True):

        self.verbose = verbose
        self._debug = False

        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        logging.info('===============================================================')
        logging.info('Initializing Video Processor with the following parameters:')
        logging.info('   - OpenCV Version     : {}'.format(cv2.__version__))
        logging.info('   - Device Path        : {}'.format(videoPath))
        logging.info('   - Frame Size         : {}x{}'.format(videoW, videoH))
        logging.info('===============================================================')

        # To send message to clients (Browser)
        self.imageStreamHandler = None
        self.threadExecutor = None

        # Video source
        self.videoData = Video_Data(self, videoPath)
        self.displayFrame = np.array([])
        self.frame_org = np.array([])

        # for Frame Rate measurement
        self.fps = FPS()

        playback_mode = self.videoData.get_playback_mode()
        self._playback_sync = (playback_mode == Video_Playback_Mode.Sync)
        self._fps_target = 30
        self._fps_wait = 1000.0/30
        self.currentFps = 30.0

        # For display
        self._fontScale = float(fontScale)
        self._annotate = False

        # Track states of this object
        self.set_video_processor_state(VideoProcessorState.Unknown)

        # OpenVINO
        self.inference_engine = None
        self.runInference = 0

        self.ioLoop = None
        self.current_model_data = None
示例#2
0
    def run(self):
        global RECTS_ARR  # contains current bounding rectangles
        global fpsProc  # fpsProc counter
        fpsProc = FPS().start()

        while not program_state.STOP_PROGRAM:
            if program_state.RUN_MODE:
                lane = self.lane
                fpsProc.update()  # update fpsProc counter

                IMG = CAPTURE.read()  # Read frame from camera
                GRAY = cv2.cvtColor(
                    IMG, cv2.COLOR_BGR2GRAY)  # Turn image to Grayscale
                _, THRESHOLD_IMG = cv2.threshold(
                    GRAY, handle_config.WHITE_THRESH, 255,
                    0)  # Run threshold on gray image

                THRESH_LANE_IMG = THRESHOLD_IMG[
                    handle_config.LANE_HEIGHT_START:handle_config.
                    LANE_HEIGHT_END, handle_config.
                    LANE_WIDTH_START[lane]:handle_config.LANE_WIDTH_END[lane]]

                # run opencv find contours, only external boxes
                _, CONTOURS, _ = cv2.findContours(THRESH_LANE_IMG,
                                                  cv2.RETR_EXTERNAL,
                                                  cv2.CHAIN_APPROX_SIMPLE)
                if CONTOURS:
                    try:
                        contour = max(
                            CONTOURS,
                            key=cv2.contourArea)  # find the biggest area
                    except Exception as e:
                        info_logger.lane_error(lane, CONTOURS, e)

                    rect = cv2.minAreaRect(contour)
                    box = cv2.boxPoints(rect)[:]
                    box = np.int64(box)

                    start_pos = min([position[1] for position in box])

                    # if area big enough
                    # if contour within top bound
                    if cv2.contourArea(contour) > handle_config.MIN_AREA and \
                        start_pos > handle_config.EDGE_GAP:

                        for position in box:
                            position[0] += handle_config.LANE_WIDTH_START[lane]
                            position[1] += handle_config.LANE_HEIGHT_START

                        RECTS_ARR[lane] = rect
                        BOX_ARR[lane] = box
示例#3
0
def main():
    """
    Main starting point for the program

    :return: N/A
    """
    Graphics.init()
    FPS.init()

    drawer = Drawer()

    key_down_listeners = [drawer.on_key_down]

    mouse_down_listeners = [drawer.on_mouse_down]

    mouse_up_listeners = [drawer.on_mouse_up]

    while True:

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    sys.exit()
                if event.key == pygame.K_F10:
                    Graphics.toggle_fullscreen()

                for delegate in key_down_listeners:
                    delegate(event.key, event.mod)

            if event.type == pygame.MOUSEBUTTONDOWN or event.type == pygame.MOUSEBUTTONUP:
                mouse_position = (event.pos[0] / Graphics.get_scale()[0],
                                  event.pos[1] / Graphics.get_scale()[1])

                if event.type == pygame.MOUSEBUTTONDOWN:
                    for delegate in mouse_down_listeners:
                        delegate(mouse_position, event.button)

                if event.type == pygame.MOUSEBUTTONUP:
                    for delegate in mouse_up_listeners:
                        delegate(mouse_position, event.button)

        drawer.update()

        drawer.draw()

        FPS.update()
        Graphics.flip()
示例#4
0
    def __init__(self):
        self.prev_flight_data = None
        self.record = False
        self.tracking = False
        self.keydown = False
        self.date_fmt = '%Y-%m-%d_%H%M%S'
        self.speed = 50
        self.prev_tracking="tr_off"
        self.drone = tellopy.Tello()
        self.init_drone()
        self.init_controls()
        self.prev_tracking=0

	# container for processing the packets into frames
        self.container = av.open(self.drone.get_video_stream())
        self.vid_stream = self.container.streams.video[0]
        self.out_stream_writer=None
        self.out_file = None
        self.out_stream = None
        self.out_name = None
        self.start_time = time.time()
        self.use_openpose=False

        self.op = OP(number_people_max=5, min_size=10, debug=None)
        self.fps = FPS()
    def __init__(self, client, cam, name, gui):
        """Constructor
		
		Arguments:
			videoDirectory {str} -- Path to the directory containing the videos and the csv files of the frames.
			gui {GuiPart} -- GUI object to configure the Graphical user interface of the App.
		"""
        self.client = client
        self.cam = cam
        self.name = name
        self.duration = 10
        self.gui = gui
        self.video_handler = cv2.VideoWriter()
        self.record_thread = None
        self.stop_record_thread = threading.Event()
        self.fourcc_codec = cv2.VideoWriter_fourcc(*'MJPG')  #XVID
        self.f = None
        self.fps = FPS()
示例#6
0
    def __init__(self, arg):
        super(zed_camera, self).__init__()
        #cap = VideoStream(src=0, resolution=(2560,720)).start()
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
        self.fps = FPS()

        self.imgUL = None
        self.imgUR = None

        rospy.init_node("stereo_streamer")
        self.imgL_pub = rospy.Publisher("imgL", Image, queue_size=10)
        self.imgR_pub = rospy.Publisher("imgR", Image, queue_size=10)

        self.left_matcher = cv2.StereoSGBM_create(
            minDisparity=0,
            numDisparities=
            160,  # max_disp has to be dividable by 16 f. E. HH 192, 256
            blockSize=5,
            disp12MaxDiff=1,
            uniquenessRatio=15,
            speckleWindowSize=0,
            speckleRange=2,
            preFilterCap=63,
            mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)

        self.right_matcher = cv2.ximgproc.createRightMatcher(left_matcher)

        self.wls_filter = cv2.ximgproc.createDisparityWLSFilter(
            matcher_left=left_matcher)
        self.wls_filter.setLambda(lmbda)
        self.wls_filter.setSigmaColor(sigma)

        self.mapLx, self.mapLy = cv2.initUndistortRectifyMap(
            camera_matrix_L, dist_coeff_L, RL, PL, (2560 / 2, 720),
            cv2.CV_32FC1)

        self.mapRx, self.mapRy = cv2.initUndistortRectifyMap(
            camera_matrix_R, dist_coeff_R, RR, PR, (2560 / 2, 720),
            cv2.CV_32FC1)
示例#7
0
    def update(self):
        self._last_update += FPS.delta_time()

        if self._last_update >= self._solve_delay \
                and not self._paused \
                and not self._search_space.path_found:
            try:
                if self._solve_delay == 0:
                    while True:
                        self.path_find_generator.next()
                else:
                    self.path_find_generator.next()

            except StopIteration:
                None

            self._nodes_surface_dirty = True
            self._font_surface_dirty = True
            self._debug_surface_dirty = True

            self._last_update = 0
示例#8
0
def show_camera():
    # To flip the image, modify the flip_method parameter (0 and 2 are the most common)
    print(gstreamer_pipeline(flip_method=0))
    vs = WebcamVideoStream(src=gstreamer_pipeline()).start()
    #cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER)
    fps = FPS().start()
    take_snapshot = True

    while True:
        _ = cv2.namedWindow("CSI Camera", cv2.WINDOW_AUTOSIZE)
        # Window
        while cv2.getWindowProperty("CSI Camera", 0) >= 0:
            original_img = vs.read()
            if take_snapshot:
                save_snapshot(original_img, "original")
                take_snapshot = False
            filter = (60, 87, 120, 255, 50, 255)
            img = apply_hsv_filter(original_img, filter)
            img = erode(img, 1)
            img = dilate(img, 1)

            targets = find_contours(img)
            brColor = (255, 255, 255)
            for contour in targets:
                rr = cv2.minAreaRect(contour)
                pt = get_goal_center(rr)
                cv2.circle(original_img, pt, 6, brColor, 3)

            cv2.imshow("CSI Camera", original_img)
            # This also acts as
            keyCode = cv2.waitKey(30) & 0xFF
            # Stop the program on the ESC key
            if keyCode == 27:
                break

        fps.stop()
        print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
        print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

        cap.release()
        cv2.destroyAllWindows()
    else:
        print("Unable to open camera")
示例#9
0
args = vars(ap.parse_args())

#init camera and stream
camera = PiCamera()
camera.resolution = (640, 480)
#camera.framerate = 32
rawCapture = PiRGBArray(camera, size=(640, 480))
stream = camera.capture_continuous(rawCapture,
                                   format='bgr',
                                   use_video_port=True)

camera.close()

vs = PiVideoStream(resolution=(640, 480)).start()
time.sleep(2.0)
fps = FPS().start()

while fps._numFrames < args["num_frames"]:
    #grab frame from threaded video stream and resize it
    frame = vs.read()
    frame = imutils.resize(frame, width=640)

    #check to see if frame should be displayed
    if args["display"] > 0:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF

    #update fps counter
    fps.update

#stop timer and display FPS info
    def run(self):

        self.fps = FPS(mean_nb_frames=20)

        nb_frames = 0
        nb_pd_inferences = 0
        nb_pd_inferences_direct = 0
        nb_lm_inferences = 0
        nb_lm_inferences_after_landmarks_ROI = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0

        get_new_frame = True
        use_previous_landmarks = False

        global_time = time.perf_counter()
        while True:
            if get_new_frame:
                nb_frames += 1
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                annotated_frame = video_frame.copy()

            if not self.force_detection and use_previous_landmarks:
                self.regions = regions_from_landmarks
                mpu.detections_to_rect(
                    self.regions, kp_pair=[0, 1]
                )  # self.regions.pd_kps are initialized from landmarks on previous frame
                mpu.rect_transformation(self.regions, self.frame_size,
                                        self.frame_size)
            else:
                # Infer pose detection
                # Resize image to NN square input shape
                frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h),
                                      interpolation=cv2.INTER_AREA)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]

                pd_rtrip_time = now()
                inference = self.pd_exec_net.infer(
                    inputs={self.pd_input_blob: frame_nn})
                glob_pd_rtrip_time += now() - pd_rtrip_time
                self.pd_postprocess(inference)
                self.pd_render(annotated_frame)
                nb_pd_inferences += 1
                if get_new_frame: nb_pd_inferences_direct += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            if self.force_detection:
                for r in self.regions:
                    frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                                 self.lm_w, self.lm_h)
                    # Transpose hxwx3 -> 1x3xhxw
                    frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                    # Get landmarks
                    lm_rtrip_time = now()
                    inference = self.lm_exec_net.infer(
                        inputs={self.lm_input_blob: frame_nn})
                    glob_lm_rtrip_time += now() - lm_rtrip_time
                    nb_lm_inferences += 1
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
            elif len(self.regions) == 1:
                r = self.regions[0]
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_w, self.lm_h)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                # Get landmarks
                lm_rtrip_time = now()
                inference = self.lm_exec_net.infer(
                    inputs={self.lm_input_blob: frame_nn})
                glob_lm_rtrip_time += now() - lm_rtrip_time
                nb_lm_inferences += 1
                if use_previous_landmarks:
                    nb_lm_inferences_after_landmarks_ROI += 1

                self.lm_postprocess(r, inference)
                if not self.force_detection:
                    if get_new_frame:
                        if not use_previous_landmarks:
                            # With a new frame, we have run the landmark NN on a ROI found by the detection NN...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                        else:
                            # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                            else:
                                # ...and could not find a body
                                # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved)
                                # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame
                                get_new_frame = False
                                use_previous_landmarks = False
                                continue
                    else:
                        # On a frame on which we already ran the landmark NN without founding a body,
                        # we have run the detection NN...
                        if r.lm_score > self.lm_score_threshold:
                            # ...and succesfully found a body and its landmarks
                            use_previous_landmarks = True
                            # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                            regions_from_landmarks = [
                                mpu.Region(pd_kps=r.landmarks_padded[
                                    self.nb_lms - 2:self.nb_lms, :2] /
                                           self.frame_size)
                            ]
                            use_previous_landmarks = True
                        # else:
                        # ...and could not find a body
                        # We are sure there is no body in that frame

                        get_new_frame = True
                self.lm_render(annotated_frame, r)
            else:
                # Detection NN hasn't found any body
                get_new_frame = True

            self.fps.update()

            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps
            elif key == ord('s'):
                self.show_segmentation = not self.show_segmentation

        # Print some stats
        print(
            f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})"
        )
        print(
            f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}"
        )
        print(
            f"# landmark inferences       : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}"
        )
        print(
            f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
        )
        if nb_lm_inferences:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
cam_infoL.P = PL

cam_infoR = CameraInfo()
cam_infoR.height = CAMERA_HEIGHT
cam_infoR.width = CAMERA_WIDTH / 2
cam_infoR.distortion_model = "plumb_bob"
cam_infoR.K = KR
cam_infoR.D = DR
cam_infoR.R = RR
cam_infoR.P = PR

cap = cv2.VideoCapture(2)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)

fps = FPS()

while True:
    _, frame = cap.read()

    if frame != None:
        height, width, channels = frame.shape
        frameL = frame[0:height, 0:width / 2]
        frameR = frame[0:height, width / 2:width]

        imgL_pub.publish(bridge.cv2_to_imgmsg(frameL, "bgr8"))
        imgR_pub.publish(bridge.cv2_to_imgmsg(frameR, "bgr8"))

        camL_pub.publish(cam_infoL)
        camR_pub.publish(cam_infoR)
示例#12
0
def main():
    # initialize the video stream and allow the camera sensor to warm up
    print("[INFO] starting video stream...")
    # cap = cv2.VideoCapture(0)
    vs = WebcamVideoStream(src=0).start()
    fps = FPS().start()  #Notes the start time
    width = 440

    with open("consumer_thread.csv", 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([
            "Thread Frame #",
            "Time spent in reading the frame (seconds) from queue",
            "Time spent performing inference on the frame (seconds)"
        ])
        # loop over the frames from the video stream
        #while True:
        while fps._numFrames < args["num_frames"]:
            # grab the frame from the threaded video stream and resize it
            # to have a maximum width of 400 pixels
            # Capture frame-by-frame
            start = timer()
            frame = vs.readFromQueue()
            end = timer()
            # if frame is not None then there was atleast one frame in queue
            # when read from the queue and returned. Else queue was empty.
            if frame is not None:
                # update the FPS counter
                fps.update()
                consumerThreadFrameNumber = fps._numFrames
                consumerThreadTimeTakenToReadThisFrame = (end - start)
                print(
                    "[INFO] Consumer Thread : Time taken to read frame number",
                    consumerThreadFrameNumber, "from queue is",
                    consumerThreadTimeTakenToReadThisFrame, "seconds")
                height = frame.shape[0]
                dim = (width, height)
                frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
                # detect faces in the frame and determine if they are wearing a
                # face mask or not
                startInferenceTime = timer()
                (locs, preds) = detect_and_predict_mask(frame, net, model)
                endInferenceTime = timer()
                consumerThreadTimeTakenToPerformInference = (
                    endInferenceTime - startInferenceTime)
                print(
                    "[INFO] Consumer Thread : Time taken to performing inference on consumed frame number",
                    consumerThreadFrameNumber, "is",
                    consumerThreadTimeTakenToPerformInference, "seconds")
                writer.writerow([
                    consumerThreadFrameNumber,
                    consumerThreadTimeTakenToReadThisFrame,
                    consumerThreadTimeTakenToPerformInference
                ])
                for (box, pred) in zip(locs, preds):
                    # unpack the bounding box and predictions
                    (startX, startY, endX, endY) = box
                    (mask, withoutMask) = pred
                    label = "Mask" if mask > withoutMask else "No Mask"
                    color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
                    # include the probability in the label
                    #label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
                    # display the label and bounding box rectangle on the output
                    # frame
                    cv2.putText(frame, label, (startX, startY - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
                    cv2.rectangle(frame, (startX, startY), (endX, endY), color,
                                  2)
                    print("Showing frame")
                    # show the output frame
                    cv2.imshow("Output", frame)
                    #cv2.destroyAllWindows()
                    #key = cv2.waitKey(10) & 0xFF

                key = cv2.waitKey(1) & 0xFF
                # if the `q` key was pressed, break from the loop
                if key == ord("q"):
                    break

    fps.stop()
    vs.stop()

    print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
    print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

    cv2.destroyAllWindows()
示例#13
0
    def run(self):
        global THRESHOLD_IMG  # Thresholded image
        global RECTS_ARR  # contains current bounding rectangles
        global PASS_COUNTS, FAIL_COUNTS  # total counts
        global AVG_WIDTHS_TOTAL, AVG_HEIGHTS_TOTAL  # average width/height
        global fpsUI  # fps counter
        fpsUI = FPS().start()
        update_display = False

        while not program_state.STOP_PROGRAM:
            fpsUI.update()
            CROPPED = CAPTURE.read(
            )  # [handle_config.FRAME_HEIGHT_START:handle_config.FRAME_HEIGHT_END, handle_config.FRAME_WIDTH_START:handle_config.FRAME_WIDTH_END]
            GRAY = cv2.cvtColor(CROPPED,
                                cv2.COLOR_BGR2GRAY)  # Turn image to Grayscale
            _, THRESHOLD_IMG = cv2.threshold(GRAY, handle_config.WHITE_THRESH,
                                             255,
                                             0)  # Run threshold on gray image
            DISPLAY_IMG = CROPPED

            if program_state.THRESH_MODE:
                DISPLAY_IMG = THRESHOLD_IMG
            else:
                for lane in range(1, 2):  # handle_config.LANE_COUNT):
                    update_display = False
                    if len(RECTS_ARR[lane]) > 0 and len(BOX_ARR[lane]) > 0:
                        color = handle_config.RED
                        current_rect = RECTS_ARR[lane]
                        current_box = BOX_ARR[lane]

                        if current_rect[1][0] > current_rect[1][1]:
                            w = current_rect[1][0]
                            h = current_rect[1][1]
                        else:
                            w = current_rect[1][1]
                            h = current_rect[1][0]

                        calc_dimensions = variables.dimension_calc(lane, w, h)

                        if variables.is_pass(lane, w, h):
                            color = handle_config.GREEN

                        start_pos = min(
                            [position[0] for position in current_box])
                        high_pos = min(
                            [position[1] for position in current_box])
                        highest_pos = max(
                            [position[1] for position in current_box])
                        low_pos = max(
                            [position[1] for position in current_box]) + 15

                        exiting_box = handle_config.LANE_HEIGHT_START + \
                            handle_config.LANE_HEIGHT - \
                            handle_config.EDGE_GAP

                        if highest_pos > exiting_box:
                            cv2.drawContours(CROPPED, [current_box], 0,
                                             handle_config.ORANGE, 2)
                            pass
                        else:
                            cv2.drawContours(CROPPED, [current_box], 0, color,
                                             2)
                            cv2.putText(CROPPED, calc_dimensions,
                                        (start_pos, high_pos),
                                        handle_config.FONT, 1, color, 2)
                            update_display = True

                        if program_state.CALIBRATE_MODE:
                            pixel_dimensions = '{0}px x {1}px'.format(
                                int(w), int(h))
                            cv2.putText(CROPPED, pixel_dimensions,
                                        (start_pos, low_pos),
                                        handle_config.FONT, 1,
                                        handle_config.RED, 2)

                if program_state.REQUEST_CALIBRATE:
                    for lane in range(
                            handle_config.LANE_COUNT):  # loop through lanes
                        if RECTS_ARR[lane]:  # if lane has contour
                            current_rect = RECTS_ARR[lane]
                            if current_rect[1][0] > current_rect[1][1]:
                                w = current_rect[1][0]
                                h = current_rect[1][1]
                            else:
                                w = current_rect[1][1]
                                h = current_rect[1][0]

                            handle_config.PIXEL_WIDTHS[lane] = w
                            handle_config.PIXEL_HEIGHTS[lane] = h

                    # Adjust ratios to match calibration
                    for index, width in enumerate(handle_config.PIXEL_WIDTHS):
                        handle_config.WIDTH_RATIOS[
                            index] = handle_config.ACTUAL_WIDTH / width
                    for index, height in enumerate(
                            handle_config.PIXEL_HEIGHTS):
                        handle_config.HEIGHT_RATIOS[
                            index] = handle_config.ACTUAL_HEIGHT / height

                    # Update low highs for calibration
                    for index, ratio in enumerate(handle_config.WIDTH_RATIOS):
                        handle_config.LANE_FAIL_WIDTHS_LOW[
                            index] = handle_config.FAIL_WIDTH_LOW / ratio
                    for index, ratio in enumerate(handle_config.WIDTH_RATIOS):
                        handle_config.LANE_FAIL_WIDTHS_HIGH[
                            index] = handle_config.FAIL_WIDTH_HIGH / ratio
                    for index, ratio in enumerate(handle_config.HEIGHT_RATIOS):
                        handle_config.LANE_FAIL_HEIGHTS_LOW[
                            index] = handle_config.FAIL_HEIGHT_LOW / ratio
                    for index, ratio in enumerate(handle_config.HEIGHT_RATIOS):
                        handle_config.LANE_FAIL_HEIGHTS_HIGH[
                            index] = handle_config.FAIL_HEIGHT_HIGH / ratio

                    handle_config.setValue('CALIBRATION', 'PIXEL_WIDTHS',
                                           handle_config.PIXEL_WIDTHS)
                    handle_config.setValue('CALIBRATION', 'PIXEL_HEIGHTS',
                                           handle_config.PIXEL_HEIGHTS)

                    program_state.request_calibration(False)

                # Not Thresh Mode
                not_calib_statement = (
                    lane for lane in range(handle_config.LANE_COUNT)
                    if not program_state.CALIBRATE_MODE)
                for lane in not_calib_statement:
                    AVG_TEXT = '% PASSED: 0'
                    AVG_WIDTHS_TEXT = 'AVG LENGTH: ' + str(
                        int(AVG_WIDTHS_TOTAL[lane][0] *
                            handle_config.WIDTH_RATIOS[lane])) + 'mm'
                    AVG_HEIGHTS_TEXT = 'AVG THICKNESS: ' + str(
                        int(AVG_HEIGHTS_TOTAL[lane][0] *
                            handle_config.HEIGHT_RATIOS[lane])) + 'mm'
                    if PASS_COUNTS[lane] > 0:
                        AVG_TEXT = '% PASSED: ' + str(
                            100 * PASS_COUNTS[lane] /
                            (PASS_COUNTS[lane] + FAIL_COUNTS[lane]))
                    cv2.putText(CROPPED, 'LANE ' + str(lane + 1),
                                (handle_config.LANE_WIDTH_START[lane],
                                 handle_config.TEXT_Y), handle_config.FONT, 1,
                                handle_config.RED, 2)
                    cv2.putText(CROPPED, 'PASS: '******'FAIL: ' + str(FAIL_COUNTS[lane]),
                                (handle_config.LANE_WIDTH_START[lane],
                                 handle_config.TEXT_Y + 60),
                                handle_config.FONT, 1, handle_config.RED, 2)
                    cv2.putText(CROPPED, AVG_TEXT,
                                (handle_config.LANE_WIDTH_START[lane],
                                 handle_config.TEXT_Y + 90),
                                handle_config.FONT, 1, handle_config.RED, 2)
                    if AVG_WIDTHS_TOTAL[lane][0] > 0:
                        cv2.putText(CROPPED, AVG_WIDTHS_TEXT,
                                    (handle_config.LANE_WIDTH_START[lane],
                                     handle_config.TEXT_Y + 120),
                                    handle_config.FONT, 1, handle_config.RED,
                                    2)
                    if AVG_HEIGHTS_TOTAL[lane][0] > 0:
                        cv2.putText(CROPPED, AVG_HEIGHTS_TEXT,
                                    (handle_config.LANE_WIDTH_START[lane],
                                     handle_config.TEXT_Y + 150),
                                    handle_config.FONT, 1, handle_config.RED,
                                    2)

                # Show Lane Boundaries
                cv2.rectangle(CROPPED,
                              (handle_config.LANE_X1, handle_config.LANE_Y1),
                              (handle_config.LANE_X2, handle_config.LANE_Y2),
                              handle_config.YELLOW, 2)
                cv2.rectangle(CROPPED,
                              (handle_config.SPLIT_X1, handle_config.LANE_Y1),
                              (handle_config.SPLIT_X2, handle_config.LANE_Y2),
                              handle_config.YELLOW, 2)
                cv2.rectangle(CROPPED,
                              (handle_config.SPLIT_X3, handle_config.LANE_Y1),
                              (handle_config.SPLIT_X4, handle_config.LANE_Y2),
                              handle_config.YELLOW, 2)

                # Lane Traffic Calculations
                red_fail = '111111'
                yellow_fail = '111'

                # Lane 1 Traffic Light
                lane_1_traffic_colour = handle_config.GREEN
                lane_1_history = ''.join(str(e) for e in HISTORICAL_FAILS[0])
                if red_fail in lane_1_history:
                    lane_1_traffic_colour = handle_config.RED
                elif yellow_fail in lane_1_history:
                    lane_1_traffic_colour = handle_config.YELLOW
                cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_1_X1,
                                        handle_config.TRAFFIC_Y1),
                              (handle_config.TRAFFIC_LANE_1_X2,
                               handle_config.TRAFFIC_Y2),
                              lane_1_traffic_colour, -1)

                # Lane 2 Traffic Light
                lane_2_traffic_colour = handle_config.GREEN
                lane_2_history = ''.join(str(e) for e in HISTORICAL_FAILS[1])
                if red_fail in lane_2_history:
                    lane_2_traffic_colour = handle_config.RED
                elif yellow_fail in lane_2_history:
                    lane_2_traffic_colour = handle_config.YELLOW
                cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_2_X1,
                                        handle_config.TRAFFIC_Y1),
                              (handle_config.TRAFFIC_LANE_2_X2,
                               handle_config.TRAFFIC_Y2),
                              lane_2_traffic_colour, -1)

                # Lane 3 Traffic Light
                lane_3_traffic_colour = handle_config.GREEN
                lane_3_history = ''.join(str(e) for e in HISTORICAL_FAILS[2])
                if red_fail in lane_3_history:
                    lane_3_traffic_colour = handle_config.RED
                elif yellow_fail in lane_3_history:
                    lane_3_traffic_colour = handle_config.YELLOW
                cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_3_X1,
                                        handle_config.TRAFFIC_Y1),
                              (handle_config.TRAFFIC_LANE_3_X2,
                               handle_config.TRAFFIC_Y2),
                              lane_3_traffic_colour, -1)

                # Lane 3 Traffic Light
                lane_4_traffic_colour = handle_config.GREEN
                lane_4_history = ''.join(str(e) for e in HISTORICAL_FAILS[3])
                if red_fail in lane_4_history:
                    lane_4_traffic_colour = handle_config.RED
                elif yellow_fail in lane_4_history:
                    lane_4_traffic_colour = handle_config.YELLOW
                cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_4_X1,
                                        handle_config.TRAFFIC_Y1),
                              (handle_config.TRAFFIC_LANE_4_X2,
                               handle_config.TRAFFIC_Y2),
                              lane_4_traffic_colour, -1)

                # Show Low Cost Automation Banner
                cv2.putText(CROPPED, 'EasiBake Roll Checker', (760, 100),
                            handle_config.FONT, 3, handle_config.RED, 4)
                cv2.putText(CROPPED, 'Low Cost Automation Ltd', (900, 160),
                            handle_config.FONT, 2, handle_config.RED, 3)

                # Show current AIO
                cv2.putText(CROPPED, 'OUTPUT: ' + str(OUTPUT), (950, 890),
                            handle_config.FONT, 1, handle_config.RED, 2)

                # Show min/max values
                max_length = 'MAX LENGTH = ' + str(
                    int(handle_config.FAIL_WIDTH_HIGH)) + 'mm   '
                min_length = 'MIN LENGTH = ' + str(
                    int(handle_config.FAIL_WIDTH_LOW)) + 'mm    '
                max_thickness = 'MAX THICKNESS = ' + str(
                    int(handle_config.FAIL_HEIGHT_HIGH)) + 'mm'
                min_thickness = 'MIN THICKNESS = ' + str(
                    int(handle_config.FAIL_HEIGHT_LOW)) + 'mm'
                cv2.putText(CROPPED, 'CURRENT REJECT SETTINGS', (240, 1800),
                            handle_config.FONT, 1, handle_config.RED, 2)
                cv2.line(CROPPED, (50, 1815), (875, 1815), handle_config.RED,
                         2)
                cv2.putText(CROPPED, max_length + max_thickness, (50, 1850),
                            handle_config.FONT, 1, handle_config.RED, 2)
                cv2.putText(CROPPED, min_length + min_thickness, (50, 1900),
                            handle_config.FONT, 1, handle_config.RED, 2)

            window_name = 'LINE VIEW'
            if DISPLAY_IMG != [] and update_display:
                cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN)
                cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
                                      cv2.WINDOW_FULLSCREEN)
                DISPLAY_IMG = cv2.resize(DISPLAY_IMG, (1600, 900),
                                         interpolation=cv2.INTER_AREA)
                cv2.imshow(window_name, DISPLAY_IMG)

            # Required for loop no need for key read
            _ = cv2.waitKey(1) & 0xFF
class CamThreadController:
    """Separate Thread Class responsible of the files recording
	"""
    def __init__(self, client, cam, name, gui):
        """Constructor
		
		Arguments:
			videoDirectory {str} -- Path to the directory containing the videos and the csv files of the frames.
			gui {GuiPart} -- GUI object to configure the Graphical user interface of the App.
		"""
        self.client = client
        self.cam = cam
        self.name = name
        self.duration = 10
        self.gui = gui
        self.video_handler = cv2.VideoWriter()
        self.record_thread = None
        self.stop_record_thread = threading.Event()
        self.fourcc_codec = cv2.VideoWriter_fourcc(*'MJPG')  #XVID
        self.f = None
        self.fps = FPS()

    def getNewFiles(self, videoDirectory, id, name):
        """This Function is a generator of the new files names caller each 15 minutes
		
		Arguments:
			videoDirectory {str} -- Path to the directory containing the csv files of the GPS track log.
			id {str} -- new id for each file. 
		
		Returns:
			[str] -- frames file name.
			[str] -- video file name.
		"""

        frames_filename = videoDirectory + "track_frames_" + self.name + "_" + id + "_" + time.strftime(
            "%d-%m-%Y-%H-%M-%S") + ".csv"
        self.frames_recorder = open(frames_filename, 'a')
        self.frames_recorder.write("frames_id,frames_timestamp \n")
        self.frames_recorder.close()
        video_filename = videoDirectory + "track_video_" + self.name + "_" + id + "_" + time.strftime(
            "%d-%m-%Y-%H-%M-%S") + ".avi"
        self.video_handler.open(
            video_filename, self.fourcc_codec, 28,
            (self.client.camera.width, self.client.camera.height))
        return frames_filename, video_filename

    def openfiles(self, frames_filename):
        """This function open the files
		
		Arguments:
			frames_filename {str} -- File name of the frames csv file.
			video_filename {str} -- File name of the video.
		"""
        self.frames_recorder = open(frames_filename, 'a')

    def write(self, videoHandler, frame, framesHandler, frame_id):
        """ This function write the data into the corresponding directories
		
		Arguments:
			videoHandler {str} -- Video file handler to write the video.
			framesHandler {str} -- frames file handler to write the csv file.
			frame_id {integer} -- The id of each frame from the camera. 
		"""

        framesHandler.write(
            str(frame_id) + "," + str(datetime.datetime.utcnow()) + "\n")
        videoHandler.write(frame)
        framesHandler.close()

    def record(self, properties, path):
        """This function is the main thread 
		"""
        self.fps.start()
        start = time.time()
        id = self.getId(properties)
        frame_id = 0
        frames_filename, video_filename = self.getNewFiles(path, id, self.cam)
        while not self.stop_record_thread.is_set():
            ret, frame = self.cam.read()
            if np.array_equal(self.f, frame):
                continue
            self.fps.update()
            self.f = frame
            currentTime = time.time()
            if (int(currentTime - start) >= self.duration):
                self.fps.stop()
                print("[INFO] elapsed time: {:.2f}".format(self.fps.elapsed()))
                print("[INFO] approx. FPS: {:.2f}".format(self.fps.fps()))
                id = self.getId(properties)
                frames_filename, video_filename = self.getNewFiles(
                    path, id, self.name)
                frame_id = 0
                start = time.time()
                self.fps.start()
            self.openfiles(frames_filename)
            self.write(self.video_handler, frame, self.frames_recorder,
                       frame_id)
            frame_id = frame_id + 1
        self.video_handler.release()

    def start(self, properties, path):
        """Function to start the recording thread
		"""
        self.stop_record_thread.clear()
        self.record_thread = threading.Thread(target=self.record,
                                              args=(
                                                  properties,
                                                  path,
                                              ))
        self.record_thread.setDaemon(True)
        self.record_thread.start()

    def stop(self):
        """Function to sto the recording thread
		"""
        self.stop_record_thread.set()
        self.record_thread.join(4)
        self.record_thread = None
        self.fps.stop()

    def isAlive(self):
        """Function to check if the thread is still alive
		
		Returns:
			[bool] -- Thread running: True, Thread stopped: False
		"""
        return self.record_thread.isAlive()

    def getId(self, properties):
        """Function to get a new ID each time a file is created
		
		Returns:
			[integer] -- ID
		"""
        # Read last availaible id
        p = Properties()
        with open(properties, 'rb') as f:
            p.load(f, 'utf-8')
        id = str(int(p["video_id"].data))
        # Increment the id in the file
        p["video_id"] = str(int(p["video_id"].data) + 1)
        with open(properties, 'wb') as f:
            p.store(f, encoding="utf-8")

        # Return availaible id
        return id
示例#15
0
class ThreadedClient:
    """
    Launch the main part of the GUI and the differents threads of the Tool.
    One class responsible of the different control threads 
    """
    def __init__(self, master):
        """
        Start the GUI and the asynchronous threads. We are in the main
        (original) thread of the application, which will later be used by
        the GUI as well.
        """
        self.master = master
        self.gps_destination_file_name = ""
        self.frames_destination_timestamp = ""
        self.video_destination_file_handler = cv2.VideoWriter()
        self.gps_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/GPS/"
        self.video_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/Videos/"
        self.record = False
        self.img = None
        self.frame = None
        self.frame1 = None
        self.f = 0
        self.check = False
        self.brahim = 1.0 / 25

        # p = Properties()
        # with open('config.properties', 'rb') as f:
        #     p.load(f, 'utf-8')

        # print(p["video_id"].data)                     #### incremtentaiton properties file
        # p["video_id"]=str(int(p["video_id"].data) +1)
        # print(p["video_id"].data)
        # with open('config.properties', 'wb') as f:
        #     p.store(f, encoding="utf-8")

        self.frame = None
        # Create the gps queue
        self.gps_queue = Queue()
        # principal thread
        self.running = 1
        self.cameras_state = self.find_cam()
        print(self.cameras_state)
        if self.cameras_state[0]:
            self.camera = See3Cam(src=self.cameras_state[1],
                                  width=1280,
                                  height=720,
                                  framerate=30,
                                  name="CAM")
            self.camera.start()
        if self.cameras_state[2]:
            self.camera1 = See3Cam(src=self.cameras_state[3],
                                   width=1280,
                                   height=720,
                                   framerate=30,
                                   name="CAM1")
            self.camera1.start()

        self.fps = FPS()

        # Set up Gps
        self.gps = GpsCapture()
        # Set up the GUI part
        self.gui = GuiPart(master, self, self.cameras_state, self.gps.running,
                           self.recordData, self.stopRecord)

        # Set up the thread for the GPS checking
        gps_controller = threading.Thread(target=self.checkGpsConnection,
                                          args=(2, ))
        gps_controller.setDaemon(True)
        gps_controller.start()

        # Set up the thread for the camera checking
        camera_controller = threading.Thread(target=self.checkCameraConnection,
                                             args=(2, ))
        camera_controller.setDaemon(True)
        camera_controller.start()

        # # Set up the thread for the video stream
        # camera_controller = threading.Thread(target=self.readCameraThread, args=(self.brahim,))
        # camera_controller.setDaemon(True)
        # camera_controller.start()

        # Set up the thread for the GPS stream
        camera_controller = threading.Thread(
            target=self.readGpsThread, args=(self.brahim, ))  ## refactoring
        camera_controller.setDaemon(True)
        camera_controller.start()

        # Set up the Thread for the data recording
        self.recordingThread = RecordingThreadController(
            self, self.video_destination_dir, self.gps_destination_dir,
            self.gui)

        self.video_output = True
        # Start the periodic call in the GUI .

        self.fps.start()
        self.periodicCall()

    def periodicCall(self):
        """
        Check every 1 ms the connection to the GPS system and camera and 
        send them to GUI part.
        """

        if self.gps.running or self.frame is not None or self.frame1 is not None:
            self.check = True

        # Update the GUI of the camera and GPS status
        self.gui.processIncoming(self.cameras_state, self.gps.running,
                                 self.video_output, self.frame, self.frame1)

        if not self.running:
            # This is the brutal stop of the system.
            sys.exit(1)
        # # Repeat this function every 1 ms
        # self.f = self.f+1
        # print(self.f)
        self.master.after(1, self.periodicCall)

    def checkGpsConnection(self, interval=1):
        """
        Thread to Check the port connection and the status of the GPS System
        every second.
        """
        while True:
            # Get the GPS port connection
            verify_gps_connection, gps_port = self.gps.get_port()
            if not verify_gps_connection:
                self.gps.running = False
                self.gps.isConnected = False
            elif not self.gps.running:
                self.gps.running = False
                self.gps.open_gps(gps_port, self.gps.baudrate)
            else:
                self.gps.running = True
                self.gps.isConnected = True

            time.sleep(interval)

    def checkCameraConnection(self, interval=1):
        """
        Thread to Check the port connection and the status of the Camera
        every second.
        """
        while True:
            self.cameras_state = self.find_cam()
            if not self.cameras_state[0]:
                time.sleep(1)
                continue
            if not self.cameras_state[2]:
                time.sleep(1)
                continue

            time.sleep(interval)

    def readCameraThread(self, interval=0.05):
        while True:
            if self.cameras_state[0]:
                try:
                    self.grabbed, self.frame = self.camera.read()
                except AttributeError:
                    time.sleep(2)
            if self.cameras_state[2]:
                try:
                    self.grabbed1, self.frame1 = self.camera1.read()
                except AttributeError:
                    time.sleep(2)
            # try:
            #     self.frame = self.detect(self.frame2, net.eval(), transform)
            # except RuntimeError:
            #     print("Runtime error")
            # if  self.recordingThread.record_thread is not None and self.recordingThread.isAlive():
            #     print("putting in cam queue")
            #     self.cam_queue.put(self.frame)
            time.sleep(interval)

            self.fps.update()

    def readGpsThread(self, interval=1):

        while True:

            if self.gps.running:
                self.gps.read()

            time.sleep(interval)
            # if  self.recordingThread.record_thread is not None and self.recordingThread.isAlive():
            #     print("Putting in gps queue")
            #     self.gps_queue.put(self.gps)

    def recordData(self):
        """
        This function listens to the record button and starts the recording thread accordingly
        """
        if self.check:
            if not self.record:
                self.video_output = False
                self.recordingThread.start()
                self.record = True
                self.gui.btn_record.configure(text="Recording", bg="red")
                self.gui.progress_bar.start(int(
                    10000 / 100))  #  duration of videos divided by 100
            else:
                print("Alreadiy recording")
        else:
            print("Cannot record, There is no device connected !")

    def stopRecord(self):
        """
        This function listens to the record button and starts the recording thread accordingly
        """
        if self.record:
            self.video_output = True
            self.recordingThread.stop()
            self.record = False
            self.gui.btn_record.configure(text="Record Data", bg="green")
            self.gui.progress_bar.stop()
        else:
            print("There is no recording")

    def find_cam(self):
        camera_indexes = []
        cmd = ["/usr/bin/v4l2-ctl", "--list-devices"]
        out, err = subprocess.Popen(cmd, stdout=PIPE,
                                    stderr=PIPE).communicate()
        out, err = out.strip(), err.strip()
        for l in [i.split(b'\n\t') for i in out.split(b'\n\n')]:
            if "See3CAM_CU20" in l[0].decode(encoding="UTF-8"):
                camera_indexes.append(int(l[1].decode(encoding="UTF-8")[-1]))

        if camera_indexes.__len__() == 2:
            return (True, camera_indexes[0], True, camera_indexes[1])
        elif camera_indexes.__len__() == 1:
            return (True, camera_indexes[0], False, None)
        else:
            return (False, None, False, None)

    def detect(
        self, frame, net, transform
    ):  # We define a detect function that will take as inputs, a frame, a ssd neural network, and a transformation to be applied on the images, and that will return the frame with the detector rectangle.

        height, width = frame.shape[:
                                    2]  # We get the height and the width of the frame.
        frame_t = transform(frame)[
            0]  # We apply the transformation to our frame.
        x = torch.from_numpy(frame_t).permute(
            2, 0, 1)  # We convert the frame into a torch tensor.
        x = Variable(x.unsqueeze(
            0))  # We add a fake dimension corresponding to the batch.
        x = x.cuda()
        y = net(
            x
        )  # We feed the neural network ssd with the image and we get the output y.
        detections = y.data  # We create the detections tensor contained in the output y.
        scale = torch.Tensor(
            [width, height, width, height]
        )  # We create a tensor object of dimensions [width, height, width, height].
        for i in range(detections.size(1)):  # For every class:
            if (labelmap[i - 1] == "car") or (labelmap[i - 1] == "person") or (
                    labelmap[i - 1]
                    == "motorbike") or (labelmap[i - 1]
                                        == "bicycle") or (labelmap[i - 1]
                                                          == "bus"):
                j = 0  # We initialize the loop variable j that will correspond to the occurrences of the class.
                while detections[
                        0, i, j,
                        0] >= 0.6:  # We take into account all the occurrences j of the class i that have a matching score larger than 0.6.
                    # We get the coordinates of the points at the upper left and the lower right of the detector rectangle.
                    pt = (detections[0, i, j, 1:] * scale).cpu().numpy()
                    topLeft = (
                        int(pt[0]), int(pt[1])
                    )  # We get the top Left corner of the ogject detected
                    bottomRight = (
                        int(pt[2]), int(pt[3])
                    )  # We get the bottom Right corner of the object detected
                    x, y = topLeft[0], topLeft[
                        1]  # We get the coordinates of the top Left Corner
                    x = max(x, 0)
                    y = max(y, 0)
                    w, h = bottomRight[0] - topLeft[0], bottomRight[1] - topLeft[
                        1]  # We get the width and the height of the object detected
                    #print("x = ", x, " y  = ", y, "w = ", w , " h = " ,h)
                    cv2.rectangle(
                        frame, topLeft, bottomRight, (0, 0, 0), cv2.FILLED
                    )  # We draw a rectangle around the detected object.
                    cv2.putText(
                        frame, labelmap[i - 1], topLeft,
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2,
                        cv2.LINE_AA
                    )  # We put the label of the class right above the rectangle.
                    j += 1  # We increment j to get to the next occurrence.
        return frame  # We return the original frame with the detector rectangle and the label around the detected object.
示例#16
0
class zed_camera(object):
    """docstring for zed_camera."""
    def __init__(self, arg):
        super(zed_camera, self).__init__()
        #cap = VideoStream(src=0, resolution=(2560,720)).start()
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
        self.fps = FPS()

        self.imgUL = None
        self.imgUR = None

        rospy.init_node("stereo_streamer")
        self.imgL_pub = rospy.Publisher("imgL", Image, queue_size=10)
        self.imgR_pub = rospy.Publisher("imgR", Image, queue_size=10)

        self.left_matcher = cv2.StereoSGBM_create(
            minDisparity=0,
            numDisparities=
            160,  # max_disp has to be dividable by 16 f. E. HH 192, 256
            blockSize=5,
            disp12MaxDiff=1,
            uniquenessRatio=15,
            speckleWindowSize=0,
            speckleRange=2,
            preFilterCap=63,
            mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)

        self.right_matcher = cv2.ximgproc.createRightMatcher(left_matcher)

        self.wls_filter = cv2.ximgproc.createDisparityWLSFilter(
            matcher_left=left_matcher)
        self.wls_filter.setLambda(lmbda)
        self.wls_filter.setSigmaColor(sigma)

        self.mapLx, self.mapLy = cv2.initUndistortRectifyMap(
            camera_matrix_L, dist_coeff_L, RL, PL, (2560 / 2, 720),
            cv2.CV_32FC1)

        self.mapRx, self.mapRy = cv2.initUndistortRectifyMap(
            camera_matrix_R, dist_coeff_R, RR, PR, (2560 / 2, 720),
            cv2.CV_32FC1)

    def read(self):
        _, frame = self.cap.read()
        height, width, channels = frame.shape
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frameL = frame[0:height, 0:width / 2]
        self.imgUL = cv2.resize(cv2.remap(frameL, self.mapLx, self.mapLy,
                                          cv2.INTER_LINEAR), (0, 0),
                                fx=0.5,
                                fy=0.5)
        frameR = frame[0:height, width / 2:width]
        self.imgUR = cv2.resize(cv2.remap(frameR, self.mapRx, self.mapRy,
                                          cv2.INTER_LINEAR), (0, 0),
                                fx=0.5,
                                fy=0.5)

        displ = self.left_matcher.compute(imgUL,
                                          imgUR)  # .astype(np.float32)/16
        dispr = self.right_matcher.compute(imgUR, imgUL)
        displ = np.int16(displ)
        dispr = np.int16(dispr)

        filteredImg = self.wls_filter.filter(displ, imgUL, None, dispr)
        filteredImg = cv2.normalize(src=filteredImg,
                                    dst=filteredImg,
                                    beta=0,
                                    alpha=255,
                                    norm_type=cv2.NORM_MINMAX)
        filteredImg = np.uint8(filteredImg)

        self.fps.update()

    def streem(self):
        self.imgL_pub.publish(bridge.cv2_to_imgmsg(self.imgUL, "bgr8"))
        self.imgR_pub.publish(bridge.cv2_to_imgmsg(self.imgUR, "bgr8"))
示例#17
0
#out = cv2.VideoWriter('output.avi', -1, 20.0, (640,480))




font = cv2.FONT_HERSHEY_SIMPLEX
# this is used to test the relative fps of the threaded and unthreaded options

# grab a pointer to the video stream and initialize the FPS counter
print("[INFO] sampling frames without Threading...")
stream = cv2.VideoCapture('test.avi')

#proc = Process(target=frame_display, args=(taskqueue, outqueue))
#proc.start()

fps = FPS()
fps.start()
 
# loop over some frames
while fps._numFrames < 100:
	# grab the frame from the stream and resize it to have a maximum
	# width of 400 pixels
	(grabbed, frame) = stream.read()
	frame = cv2.resize(frame, (640,400))
        if grabbed:
                out.write(frame)
	#cv2.imshow("Frame", frame)
	#key = cv2.waitKey(1) & 0xFF

	fps.update()
 
class BlazeposeOpenvino:
    def __init__(self,
                 input_src=None,
                 pd_xml=POSE_DETECTION_MODEL,
                 pd_device="CPU",
                 pd_score_thresh=0.5,
                 pd_nms_thresh=0.3,
                 lm_xml=FULL_BODY_LANDMARK_MODEL,
                 lm_device="CPU",
                 lm_score_threshold=0.7,
                 full_body=True,
                 use_gesture=False,
                 smoothing=True,
                 filter_window_size=5,
                 filter_velocity_scale=10,
                 show_3d=False,
                 crop=False,
                 multi_detection=False,
                 force_detection=False,
                 output=None):

        self.pd_score_thresh = pd_score_thresh
        self.pd_nms_thresh = pd_nms_thresh
        self.lm_score_threshold = lm_score_threshold
        self.full_body = full_body
        self.use_gesture = use_gesture
        self.smoothing = smoothing
        self.show_3d = show_3d
        self.crop = crop
        self.multi_detection = multi_detection
        self.force_detection = force_detection
        if self.multi_detection:
            print(
                "Warning: with multi-detection, smoothing filter is disabled and pose detection is forced on every frame."
            )
            self.smoothing = False
            self.force_detection = True

        if input_src.endswith('.jpg') or input_src.endswith('.png'):
            self.input_type = "image"
            self.img = cv2.imread(input_src)
            self.video_fps = 25
            video_height, video_width = self.img.shape[:2]
        else:
            self.input_type = "video"
            if input_src.isdigit():
                input_type = "webcam"
                input_src = int(input_src)
            self.cap = cv2.VideoCapture(input_src)
            self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
            video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        print("Video FPS:", self.video_fps)

        # The full body landmark model predict 39 landmarks.
        # We are interested in the first 35 landmarks
        # from 1 to 33 correspond to the well documented body parts,
        # 34th (mid hips) and 35th (a point above the head) are used to predict ROI of next frame
        # Same for upper body model but with 8 less landmarks
        self.nb_lms = 35 if self.full_body else 27

        if self.smoothing:
            self.filter = mpu.LandmarksSmoothingFilter(filter_window_size,
                                                       filter_velocity_scale,
                                                       (self.nb_lms - 2, 3))

        # Create SSD anchors
        # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt
        anchor_options = mpu.SSDAnchorOptions(
            num_layers=4,
            min_scale=0.1484375,
            max_scale=0.75,
            input_size_height=128,
            input_size_width=128,
            anchor_offset_x=0.5,
            anchor_offset_y=0.5,
            strides=[8, 16, 16, 16],
            aspect_ratios=[1.0],
            reduce_boxes_in_lowest_layer=False,
            interpolated_scale_aspect_ratio=1.0,
            fixed_anchor_size=True)
        self.anchors = mpu.generate_anchors(anchor_options)
        self.nb_anchors = self.anchors.shape[0]
        print(f"{self.nb_anchors} anchors have been created")

        # Load Openvino models
        self.load_models(pd_xml, pd_device, lm_xml, lm_device)

        # Rendering flags
        self.show_pd_box = False
        self.show_pd_kps = False
        self.show_rot_rect = False
        self.show_landmarks = True
        self.show_scores = False
        self.show_gesture = self.use_gesture
        self.show_fps = True
        self.show_segmentation = False

        if self.show_3d:
            self.vis3d = o3d.visualization.Visualizer()
            self.vis3d.create_window()
            opt = self.vis3d.get_render_option()
            opt.background_color = np.asarray([0, 0, 0])
            z = min(video_height, video_width) / 3
            self.grid_floor = create_grid([0, video_height, -z],
                                          [video_width, video_height, -z],
                                          [video_width, video_height, z],
                                          [0, video_height, z],
                                          5,
                                          2,
                                          color=(1, 1, 1))
            self.grid_wall = create_grid([0, 0, z], [video_width, 0, z],
                                         [video_width, video_height, z],
                                         [0, video_height, z],
                                         5,
                                         2,
                                         color=(1, 1, 1))
            self.vis3d.add_geometry(self.grid_floor)
            self.vis3d.add_geometry(self.grid_wall)
            view_control = self.vis3d.get_view_control()
            view_control.set_up(np.array([0, -1, 0]))
            view_control.set_front(np.array([0, 0, -1]))

        if output is None:
            self.output = None
        else:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            self.output = cv2.VideoWriter(output, fourcc, self.video_fps,
                                          (video_width, video_height))

    def load_models(self, pd_xml, pd_device, lm_xml, lm_device):

        print("Loading Inference Engine")
        self.ie = IECore()
        print("Device info:")
        versions = self.ie.get_versions(pd_device)
        print("{}{}".format(" " * 8, pd_device))
        print("{}MKLDNNPlugin version ......... {}.{}".format(
            " " * 8, versions[pd_device].major, versions[pd_device].minor))
        print("{}Build ........... {}".format(
            " " * 8, versions[pd_device].build_number))

        # Pose detection model
        pd_name = os.path.splitext(pd_xml)[0]
        pd_bin = pd_name + '.bin'
        print(
            "Pose Detection model - Reading network files:\n\t{}\n\t{}".format(
                pd_xml, pd_bin))
        self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin)
        # Input blob: input - shape: [1, 3, 128, 128]
        # Output blob: classificators - shape: [1, 896, 1] : scores
        # Output blob: regressors - shape: [1, 896, 12] : bboxes
        self.pd_input_blob = next(iter(self.pd_net.input_info))
        print(
            f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}"
        )
        _, _, self.pd_h, self.pd_w = self.pd_net.input_info[
            self.pd_input_blob].input_data.shape
        for o in self.pd_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}")
            self.pd_scores = "classificators"
            self.pd_bboxes = "regressors"
        print("Loading pose detection model into the plugin")
        self.pd_exec_net = self.ie.load_network(network=self.pd_net,
                                                num_requests=1,
                                                device_name=pd_device)
        self.pd_infer_time_cumul = 0
        self.pd_infer_nb = 0

        self.infer_nb = 0
        self.infer_time_cumul = 0

        # Landmarks model
        if lm_device != pd_device:
            print("Device info:")
            versions = self.ie.get_versions(pd_device)
            print("{}{}".format(" " * 8, pd_device))
            print("{}MKLDNNPlugin version ......... {}.{}".format(
                " " * 8, versions[pd_device].major, versions[pd_device].minor))
            print("{}Build ........... {}".format(
                " " * 8, versions[pd_device].build_number))

        lm_name = os.path.splitext(lm_xml)[0]
        lm_bin = lm_name + '.bin'
        print("Landmark model - Reading network files:\n\t{}\n\t{}".format(
            lm_xml, lm_bin))
        self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin)
        # Input blob: input_1 - shape: [1, 3, 256, 256]
        # Output blob: ld_3d - shape: [1, 195]  for full body or [1, 155] for upper body
        # Output blob: output_poseflag - shape: [1, 1]
        # Output blob: output_segmentation - shape: [1, 1, 128, 128]
        self.lm_input_blob = next(iter(self.lm_net.input_info))
        print(
            f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}"
        )
        _, _, self.lm_h, self.lm_w = self.lm_net.input_info[
            self.lm_input_blob].input_data.shape
        for o in self.lm_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}")
        self.lm_score = "output_poseflag"
        self.lm_segmentation = "output_segmentation"
        self.lm_landmarks = "ld_3d"
        print("Loading landmark model to the plugin")
        self.lm_exec_net = self.ie.load_network(network=self.lm_net,
                                                num_requests=1,
                                                device_name=lm_device)
        self.lm_infer_time_cumul = 0
        self.lm_infer_nb = 0

    def pd_postprocess(self, inference):
        scores = np.squeeze(inference[self.pd_scores])  # 896
        bboxes = inference[self.pd_bboxes][0]  # 896x12
        # Decode bboxes
        self.regions = mpu.decode_bboxes(self.pd_score_thresh,
                                         scores,
                                         bboxes,
                                         self.anchors,
                                         best_only=not self.multi_detection)
        # Non maximum suppression (not needed if best_only is True)
        if self.multi_detection:
            self.regions = mpu.non_max_suppression(self.regions,
                                                   self.pd_nms_thresh)

        mpu.detections_to_rect(self.regions,
                               kp_pair=[0, 1] if self.full_body else [2, 3])
        mpu.rect_transformation(self.regions, self.frame_size, self.frame_size)

    def pd_render(self, frame):
        for r in self.regions:
            if self.show_pd_box:
                box = (np.array(r.pd_box) * self.frame_size).astype(int)
                cv2.rectangle(frame, (box[0], box[1]),
                              (box[0] + box[2], box[1] + box[3]), (0, 255, 0),
                              2)
            if self.show_pd_kps:
                # Key point 0 - mid hip center
                # Key point 1 - point that encodes size & rotation (for full body)
                # Key point 2 - mid shoulder center
                # Key point 3 - point that encodes size & rotation (for upper body)
                if self.full_body:
                    # Only kp 0 and 1 used
                    list_kps = [0, 1]
                else:
                    # Only kp 2 and 3 used for upper body
                    list_kps = [2, 3]
                for kp in list_kps:
                    x = int(r.pd_kps[kp][0] * self.frame_size)
                    y = int(r.pd_kps[kp][1] * self.frame_size)
                    cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)
                    cv2.putText(frame, str(kp), (x, y + 12),
                                cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
            if self.show_scores and r.pd_score is not None:
                cv2.putText(frame, f"Pose score: {r.pd_score:.2f}",
                            (50, self.frame_size // 2), cv2.FONT_HERSHEY_PLAIN,
                            2, (255, 255, 0), 2)

    def lm_postprocess(self, region, inference):
        region.lm_score = np.squeeze(inference[self.lm_score])
        if region.lm_score > self.lm_score_threshold:
            self.nb_active_regions += 1

            lm_raw = inference[self.lm_landmarks].reshape(-1, 5)
            # Each keypoint have 5 information:
            # - X,Y coordinates are local to the region of
            # interest and range from [0.0, 255.0].
            # - Z coordinate is measured in "image pixels" like
            # the X and Y coordinates and represents the
            # distance relative to the plane of the subject's
            # hips, which is the origin of the Z axis. Negative
            # values are between the hips and the camera;
            # positive values are behind the hips. Z coordinate
            # scale is similar with X, Y scales but has different
            # nature as obtained not via human annotation, by
            # fitting synthetic data (GHUM model) to the 2D
            # annotation.
            # - Visibility, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame and not occluded by another bigger body
            # part or another object.
            # - Presence, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame.

            # Normalize x,y,z. Here self.lm_w = self.lm_h and scaling in z = scaling in x = 1/self.lm_w
            lm_raw[:, :3] /= self.lm_w
            # Apply sigmoid on visibility and presence (if used later)
            # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5]))

            # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box
            region.landmarks = lm_raw[:, :3]
            # Calculate the landmark coordinate in square padded image (region.landmarks_padded)
            src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32)
            dst = np.array(
                [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32
            )  # region.rect_points[0] is left bottom point and points going clockwise!
            mat = cv2.getAffineTransform(src, dst)
            lm_xy = np.expand_dims(region.landmarks[:self.nb_lms, :2], axis=0)
            lm_xy = np.squeeze(cv2.transform(lm_xy, mat))
            # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the
            # original image. Then I arbitrarily divide by 4 for a more realistic appearance.
            lm_z = region.landmarks[:self.nb_lms, 2:3] * region.rect_w_a / 4
            lm_xyz = np.hstack((lm_xy, lm_z))
            if self.smoothing:
                lm_xyz = self.filter.apply(lm_xyz)
            region.landmarks_padded = lm_xyz.astype(np.int)
            # If we added padding to make the image square, we need to remove this padding from landmark coordinates
            # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed))
            region.landmarks_abs = region.landmarks_padded.copy()
            if self.pad_h > 0:
                region.landmarks_abs[:, 1] -= self.pad_h
            if self.pad_w > 0:
                region.landmarks_abs[:, 0] -= self.pad_w

            if self.use_gesture: self.recognize_gesture(region)

            if self.show_segmentation:
                self.seg = np.squeeze(inference[self.lm_segmentation])
                self.seg = 1 / (1 + np.exp(-self.seg))

    def lm_render(self, frame, region):
        if region.lm_score > self.lm_score_threshold:
            if self.show_segmentation:
                ret, mask = cv2.threshold(self.seg, 0.5, 1, cv2.THRESH_BINARY)
                mask = (mask * 255).astype(np.uint8)
                cv2.imshow("seg", self.seg)
                # cv2.imshow("mask", mask)
                src = np.array(
                    [[0, 0], [128, 0], [128, 128]],
                    dtype=np.float32)  # rect_points[0] is left bottom point !
                dst = np.array(region.rect_points[1:], dtype=np.float32)
                mat = cv2.getAffineTransform(src, dst)
                mask = cv2.warpAffine(mask, mat,
                                      (self.frame_size, self.frame_size))
                # cv2.imshow("mask2", mask)
                # mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
                l = frame.shape[0]
                frame2 = cv2.bitwise_and(frame, frame, mask=mask)
                if not self.crop:
                    frame2 = frame2[self.pad_h:l - self.pad_h,
                                    self.pad_w:l - self.pad_w]
                cv2.imshow("Segmentation", frame2)
            if self.show_rot_rect:
                cv2.polylines(frame, [np.array(region.rect_points)], True,
                              (0, 255, 255), 2, cv2.LINE_AA)
            if self.show_landmarks:

                list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY
                lines = [
                    np.array(
                        [region.landmarks_padded[point, :2] for point in line])
                    for line in list_connections
                ]
                cv2.polylines(frame, lines, False, (255, 180, 90), 2,
                              cv2.LINE_AA)

                for i, x_y in enumerate(region.landmarks_padded[:self.nb_lms -
                                                                2, :2]):
                    if i > 10:
                        color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255)
                    elif i == 0:
                        color = (0, 255, 255)
                    elif i in [4, 5, 6, 8, 10]:
                        color = (0, 255, 0)
                    else:
                        color = (0, 0, 255)
                    cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11)

                if self.show_3d:
                    points = region.landmarks_abs
                    lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY
                    colors = COLORS_FULL_BODY
                    for i, a_b in enumerate(lines):
                        a, b = a_b
                        line = create_segment(points[a],
                                              points[b],
                                              radius=5,
                                              color=colors[i])
                        if line:
                            self.vis3d.add_geometry(line,
                                                    reset_bounding_box=False)

            if self.show_scores:
                cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}",
                            (region.landmarks_padded[24, 0] - 10,
                             region.landmarks_padded[24, 1] + 90),
                            cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
            if self.use_gesture and self.show_gesture:
                cv2.putText(frame, region.gesture,
                            (region.landmarks_padded[6, 0] - 10,
                             region.landmarks_padded[6, 1] - 50),
                            cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3)

    def recognize_gesture(self, r):
        def angle_with_y(v):
            # v: 2d vector (x,y)
            # Returns angle in degree ofv with y-axis of image plane
            if v[1] == 0:
                return 90
            angle = atan2(v[0], v[1])
            return np.degrees(angle)

        # For the demo, we want to recognize the flag semaphore alphabet
        # For this task, we just need to measure the angles of both arms with vertical
        right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] -
                                       r.landmarks_abs[12, :2])
        left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] -
                                      r.landmarks_abs[11, :2])
        right_pose = int((right_arm_angle + 202.5) / 45)
        left_pose = int((left_arm_angle + 202.5) / 45)
        r.gesture = semaphore_flag.get((right_pose, left_pose), None)

    def run(self):

        self.fps = FPS(mean_nb_frames=20)

        nb_frames = 0
        nb_pd_inferences = 0
        nb_pd_inferences_direct = 0
        nb_lm_inferences = 0
        nb_lm_inferences_after_landmarks_ROI = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0

        get_new_frame = True
        use_previous_landmarks = False

        global_time = time.perf_counter()
        while True:
            if get_new_frame:
                nb_frames += 1
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                annotated_frame = video_frame.copy()

            if not self.force_detection and use_previous_landmarks:
                self.regions = regions_from_landmarks
                mpu.detections_to_rect(
                    self.regions, kp_pair=[0, 1]
                )  # self.regions.pd_kps are initialized from landmarks on previous frame
                mpu.rect_transformation(self.regions, self.frame_size,
                                        self.frame_size)
            else:
                # Infer pose detection
                # Resize image to NN square input shape
                frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h),
                                      interpolation=cv2.INTER_AREA)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]

                pd_rtrip_time = now()
                inference = self.pd_exec_net.infer(
                    inputs={self.pd_input_blob: frame_nn})
                glob_pd_rtrip_time += now() - pd_rtrip_time
                self.pd_postprocess(inference)
                self.pd_render(annotated_frame)
                nb_pd_inferences += 1
                if get_new_frame: nb_pd_inferences_direct += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            if self.force_detection:
                for r in self.regions:
                    frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                                 self.lm_w, self.lm_h)
                    # Transpose hxwx3 -> 1x3xhxw
                    frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                    # Get landmarks
                    lm_rtrip_time = now()
                    inference = self.lm_exec_net.infer(
                        inputs={self.lm_input_blob: frame_nn})
                    glob_lm_rtrip_time += now() - lm_rtrip_time
                    nb_lm_inferences += 1
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
            elif len(self.regions) == 1:
                r = self.regions[0]
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_w, self.lm_h)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                # Get landmarks
                lm_rtrip_time = now()
                inference = self.lm_exec_net.infer(
                    inputs={self.lm_input_blob: frame_nn})
                glob_lm_rtrip_time += now() - lm_rtrip_time
                nb_lm_inferences += 1
                if use_previous_landmarks:
                    nb_lm_inferences_after_landmarks_ROI += 1

                self.lm_postprocess(r, inference)
                if not self.force_detection:
                    if get_new_frame:
                        if not use_previous_landmarks:
                            # With a new frame, we have run the landmark NN on a ROI found by the detection NN...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                        else:
                            # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                            else:
                                # ...and could not find a body
                                # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved)
                                # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame
                                get_new_frame = False
                                use_previous_landmarks = False
                                continue
                    else:
                        # On a frame on which we already ran the landmark NN without founding a body,
                        # we have run the detection NN...
                        if r.lm_score > self.lm_score_threshold:
                            # ...and succesfully found a body and its landmarks
                            use_previous_landmarks = True
                            # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                            regions_from_landmarks = [
                                mpu.Region(pd_kps=r.landmarks_padded[
                                    self.nb_lms - 2:self.nb_lms, :2] /
                                           self.frame_size)
                            ]
                            use_previous_landmarks = True
                        # else:
                        # ...and could not find a body
                        # We are sure there is no body in that frame

                        get_new_frame = True
                self.lm_render(annotated_frame, r)
            else:
                # Detection NN hasn't found any body
                get_new_frame = True

            self.fps.update()

            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps
            elif key == ord('s'):
                self.show_segmentation = not self.show_segmentation

        # Print some stats
        print(
            f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})"
        )
        print(
            f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}"
        )
        print(
            f"# landmark inferences       : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}"
        )
        print(
            f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
        )
        if nb_lm_inferences:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
class VideoProcessor(object):
    #
    # Initialization of Video Processor Class
    # Reads video frame from Video Stream class and process (AI Inference etc)
    # Set frame data to displayFrame for visualization
    #
    def __init__(self,
                 videoPath = '/dev/video0',
                 videoW = 1024,
                 videoH = 768,
                 fontScale = 1.0,
                 verbose = True):

        self.verbose = verbose
        self._debug = False

        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        logging.info('===============================================================')
        logging.info('Initializing Video Processor with the following parameters:')
        logging.info('   - OpenCV Version     : {}'.format(cv2.__version__))
        logging.info('   - Device Path        : {}'.format(videoPath))
        logging.info('   - Frame Size         : {}x{}'.format(videoW, videoH))
        logging.info('===============================================================')

        # To send message to clients (Browser)
        self.imageStreamHandler = None
        self.threadExecutor = None

        # Video source
        self.videoData = Video_Data(self, videoPath)
        self.displayFrame = np.array([])
        self.frame_org = np.array([])

        # for Frame Rate measurement
        self.fps = FPS()

        playback_mode = self.videoData.get_playback_mode()
        self._playback_sync = (playback_mode == Video_Playback_Mode.Sync)
        self._fps_target = 30
        self._fps_wait = 1000.0/30
        self.currentFps = 30.0

        # For display
        self._fontScale = float(fontScale)
        self._annotate = False

        # Track states of this object
        self.set_video_processor_state(VideoProcessorState.Unknown)

        # OpenVINO
        self.inference_engine = None
        self.runInference = 0

        self.ioLoop = None
        self.current_model_data = None

#
# Sets up Video Processor Class
# Creates Video Stream Class for video capture
#
    def __enter__(self):
    # async def __aenter__(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        self.set_video_path('{{\"set_video_path\":\"{}\"}}'.format(self.videoData.videoPath))

        self.inference_engine = OpenVINO_Engine(self)

        # with OpenVINO_Util() as openVino:
        #     devices = openVino.get_supported_devices()

        #     for device in devices:
        #         logging.info('>> Device : {0}'.format(device))
        #         fullName = openVino.get_device_name(device)
        #         logging.info('>> Name   : {0}'.format(fullName))
        #         self.inference_engine.hwList.append(device)

        self.inference_engine.initialize_engine()

        return self

#
# Clean up Video Processor Class
#
    def __exit__(self, exception_type, exception_value, traceback):
    # async def __aexit__(self, exception_type, exception_value, traceback):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.threadExecutor:
            self.threadExecutor.shutdown(wait=True)

        self.set_video_processor_state(VideoProcessorState.Stop)

#
# Send message to browser
#
    def send_message(self, msg):
        if self.imageStreamHandler:
            ImageStreamHandler.broadcast_message(msg)

#
# Set Video Processor State flag
#
    def set_video_processor_state(self, flag):
        self._state = flag

#
# Initializes Video Source
#
    def _init_video_source(self):
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        raise NotImplementedError

#
# Sets current video frame for display
#
    def set_display_frame(self, frame):
        assert frame.size > 0, "Frame Empty"
        self.displayFrame = frame

#
# Resturns current video frame for display
# Converts to byte data
#
    def get_display_frame(self):

        if self.displayFrame.size == 0:
            if self.videoData.get_video_data_state() == Video_Data_State.PhotoReady:
                if self.videoData.videoH == 0 or self.videoData.videoW == 0:
                    wallpaper = np.zeros((720, 1280, 3), np.uint8)
                else:
                    wallpaper = np.zeros((self.videoData.videoH, self.videoData.videoW, 3), np.uint8)

                ret, buffer = cv2.imencode( '.jpg', wallpaper )
            else:
                return None, 0

        else:
            ret, buffer = cv2.imencode( '.jpg', self.displayFrame )

        if ret and buffer.size > 0:
            return buffer.tobytes(), self.currentFps
        else:
            assert(False), '>> Display Frame Empty *************** '

#
# Resturns Inference Engine Info
#
    def get_inference_engine_info(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.inference_engine:
            devices = json.dumps(self.inference_engine.get_devices())
            if self.runInference == 1:
                state = "On"
            else:
                state = "Off"

            return '{{\"{0}\":\"{1}\",\"devices\":{2},\"get_inference_state\":\"{3}\"}}'.format(sys._getframe().f_code.co_name, self.inference_engine.signature, devices, state)

        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Retrieve a list of models
#
    def get_model_list(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.inference_engine:

            json_data = json.loads('{\"get_model_list\":[]}')

            model_list = self.inference_engine.get_model_list()
            for model in model_list:
                json_data["get_model_list"].append(json.loads(model.to_json()))

        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

        return json_data

#
# Set to keep FPS for video or not
#
    def playback_mode(self, msg):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        jsonData = json.loads(msg)
        playback_mode = jsonData["playback_mode"]

        self._playback_sync = playback_mode == "0"
        self.videoData.set_playback_mode(playback_mode)

        return '{{\"playback_mode\":\"{0}\"}}'.format(self.videoData.get_playback_mode())

#
# Stop video process
#
    def set_video_playback(self, msg):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        jsonData = json.loads(msg)

        if jsonData['set_video_playback'] == "1":
            self.set_video_processor_state(VideoProcessorState.Running)
        else:
            self.set_video_processor_state(VideoProcessorState.Pause)

        return self.get_video_playback()

#
# Return current video playback state
#
    def get_video_playback(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self._state == VideoProcessorState.Pause:
            state = "0"
        elif self._state == VideoProcessorState.Running:
            state = "1"
        else:
            assert False, "Unexpected Video Processor State"

        return '{{\"get_video_playback\":\"{}\"}}'.format(state)

#
# Stop video process
#
    def set_video_stop(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        self.videoData.set_video_playback(isPause = True)
        self.set_video_processor_state(VideoProcessorState.Pause)

#
# Start video process
#
    def set_video_start(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        self.fps.reset(self.videoData.get_video_fps())
        self.videoData.set_video_playback(isPause = False)
        self.set_video_processor_state(VideoProcessorState.Running)
        self.send_message('{\"frame_ready\":1}')

#
# Set Video Resolution
#
    def set_video_path(self, msg, loop = None):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        jsonData = json.loads(msg)

        if jsonData.get("set_video_path"):
            videoPath = jsonData["set_video_path"]
        else:
            videoPath = jsonData["videoPath"]

        self.set_video_processor_state(VideoProcessorState.Pause)

        video_data_state = self.videoData.set_video_path(videoPath, loop)

        if video_data_state == Video_Data_State.Running or video_data_state == Video_Data_State.PhotoReady:
            self.fps.reset(self.videoData.get_video_fps())
            self.set_video_start()
        else:
            self.set_video_processor_state(VideoProcessorState.Pause)

        return self.get_video_path()

#
# Return current video path
#
    def get_video_path(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
        
        return self.videoData.get_video_path()

#
# Set Video Resolution
#
    def set_video_resolution(self, msg):
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        return self.videoData.set_video_resolution(msg)
#
# Get Video Resolution
#
    def get_video_resolution(self):

        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        return self.videoData.get_video_resolution()
#
# Set AI model to use
#
    async def set_ai_model(self, loop, msg):
        if self.verbose:
            logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg))

        try:
            self.ioLoop = loop

            #1 Get Model Data
            model_data = self.inference_engine.get_ai_model_data(msg)

            current_hw        = json.loads(self.inference_engine.get_target_device())
            current_precision = json.loads(self.inference_engine.get_precision())

            if model_data.isFlagSet(Model_Flag.Loaded):
                json_data = json.loads(msg)
                device = json_data["set_target_device"]
                precision = json_data["set_precision"]

                if current_hw['get_target_device'] == device and current_precision['get_precision'] == precision:
                    logging.info(">> Model {} is loaded to {}".format(model_data.modelName, current_hw))
                    self.runInference = 1
                    self.send_message('{{\"set_ai_model\":\"Running {}\",\"isComplete\":1}}'.format(model_data.modelName))
            else:
                if self.current_model_data:
                    self.current_model_data.clearFlag(Model_Flag.Loaded)
                    self.current_model_data = None

            if not model_data is None:
                self.set_device_params(msg)
                # self.set_precision(msg)
                # self.set_target_device(msg)

                # create a task to download model from model zoo
                self.set_video_processor_state(VideoProcessorState.Pause)
                self.send_message('{{\"set_ai_model\":\"Downloading {}\"}}'.format(model_data.modelName))
                task = self.ioLoop.run_in_executor(None, self.inference_engine.download_model, model_data)
                task.add_done_callback(self.model_download_callback)
            else:
                json_data = json.loads(msg)
                self.send_message('{{\"set_ai_model\":\"Failed to get model data for {}\",\"isFailure\":1}}'.format(json_data["SetAiModel"]))

        except CancelledError:
            logging.info('-- {0}() - Cancelled'.format(sys._getframe().f_code.co_name))

        except Exception as ex:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            traceback.print_exception(exc_type, exc_obj, exc_tb)
            logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex))
#
# Callback function for model download
#
    def model_download_callback(self, future):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        model_data = future.result()

        assert(model_data is not None, "Model Data is None")

        if model_data.isFlagSet(Model_Flag.Downloaded):

            self.send_message('{{\"set_ai_model\":\"{} downloaded.  Converting to IR\"}}'.format(model_data.modelName))

            if model_data.framework == 'dldt':
                task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, model_data)
                task.add_done_callback(self.model_load_callback)
            else:
                task = self.ioLoop.run_in_executor(None, self.inference_engine.convert_model, model_data)
                task.add_done_callback(self.model_convert_callback)
        else:
            self.set_video_start()
            self.send_message('{{\"set_ai_model\":\"Download failed {}\",\"isFailure\":1}}'.format(model_data.errorMsg))

#
# Callback function for model conversion
#
    def model_convert_callback(self, future):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        model_data = future.result()

        if model_data.isFlagSet(Model_Flag.Converted):
            logging.info('   FP16 {}'.format(str(model_data.ir_dir['FP16'])))
            logging.info('   FP32 {}'.format(str(model_data.ir_dir['FP32'])))
            self.send_message('{{\"set_ai_model\":\"{} converted to IR.\\nLoading....\", \"isSuccess\":1}}'.format(model_data.modelName))
            self.inference_engine.remove_model_dir(model_data)
            task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, model_data)
            task.add_done_callback(self.model_load_callback)
        else:
            self.set_video_start()
            self.send_message('{{\"set_ai_model\":\"Convert Failed : {}\",\"isFailure\":1}}'.format(model_data.errorMsg))

#
# Callback function for model load
#
    def model_load_callback(self, future):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        model_data = future.result()

        self.set_video_start()

        if model_data.isFlagSet(Model_Flag.Loaded):

            target_device = json.loads(self.inference_engine.get_target_device())
            
            self.send_message('{{\"set_ai_model\":\"Successfully loaded {}\", \"isComplete\":1}}'.format(model_data.modelName))
            self.send_message('{{\"get_inference_engine_info\":\"{} running on {}\"}}'.format(self.inference_engine.signature, target_device['get_target_device']))
            self.current_model_data = model_data
        else:
            self.send_message('{{\"set_ai_model\":\"Load failed : {}\",\"isFailure\":1}}'.format(model_data.errorMsg))

#
# Set hardware to run inference on
#
    def set_device_params(self, msg, reload = False):
        if self.verbose:
            logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg))

        if self.inference_engine:
            self.inference_engine.set_target_device(msg)
            self.inference_engine.set_precision(msg)

            if reload == True and self.current_model_data:
                # create a task to download model from model zoo
                self.set_video_processor_state(VideoProcessorState.Pause)
                self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName))
                task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data)
                task.add_done_callback(self.model_download_callback)

            return self.inference_engine.set_target_device(msg)
        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Set hardware to run inference on
#
    # def set_target_device(self, msg):
    #     if self.verbose:
    #         logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg))

    #     if self.inference_engine:
    #         self.inference_engine.set_target_device(msg)
    #         self.inference_engine.set_precision(msg)

    #         if self.current_model_data:
    #             # create a task to download model from model zoo
    #             self.set_video_processor_state(VideoProcessorState.Pause
    #             self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName))
    #             task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data)
    #             task.add_done_callback(self.model_download_callback)

    #         return self.inference_engine.set_target_device(msg)
    #     else:
    #         assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
    #         return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Return hardware to run inference on
#
    def get_target_device(self):

        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.inference_engine:
            return self.inference_engine.get_target_device()
        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Set Inference Precision
#
    # def set_precision(self, msg):
    #     if self.verbose:
    #         logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg))

    #     if self.inference_engine:

    #         self.inference_engine.set_precision(msg)

    #         if self.current_model_data:
    #             # create a task to download model from model zoo
    #             self.set_video_processor_state(VideoProcessorState.Pause
    #             self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName))
    #             task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data)
    #             task.add_done_callback(self.model_download_callback)

    #         return self.get_precision()
    #     else:
    #         assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
    #         return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Get Inference Precision
#
    def get_precision(self):
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.inference_engine:
            return self.inference_engine.get_precision()
        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Set Confidence Level threshold
#
    def set_confidence_level(self, msg):
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        jsonData = json.loads(msg)
        confidenceLevel = int(jsonData["set_confidence_level"].replace('%',''))

        if self.inference_engine:
            return self.inference_engine.set_confidence_level(confidenceLevel)
        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Return Confidence Level threshold
#
    def get_confidence_level(self):
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        if self.inference_engine:
            return self.inference_engine.get_confidence_level()
        else:
            assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name)
            return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Set Inference State
#
    def set_inference_state(self, msg):
        if self.verbose:
            logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg))

        jsonData = json.loads(msg)
        inferenceState = jsonData["set_inference_state"]

        if self.current_model_data:
            #make sure model is loaded
            if not self.current_model_data.isFlagSet(Model_Flag.Loaded):
                return '{{\"{}\":\"{} is not loaded\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name, self.current_model_data.modelName)
            else:
                self.runInference = int(inferenceState)
                return self.get_inference_state()
        else:
            return '{{\"{}\":\"Model Data Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name)

#
# Get Current Inference State
#

    def get_inference_state(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        return '{{\"{}\":\"{}\"}}'.format(sys._getframe().f_code.co_name, self.runInference)

    async def process_video_frame_async(self, executor):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        try:
            loop = asyncio.get_event_loop()
            task = await loop.run_in_executor(executor, self.process_video_frame)
            return task

        except CancelledError:
            logging.info('-- {0}() - Cancelled'.format(sys._getframe().f_code.co_name))
            self.set_video_processor_state(VideoProcessorState.Stop)
            return 0

        except Exception as ex:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            traceback.print_exception(exc_type, exc_obj, exc_tb)
            logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex))
            return 1

    #
    # Saves frame data to a file
    #
    def save_image(self):
        cv2.imwrite("./frame.png", self.displayFrame)
        #cv2.imwrite("./frame.png", self.frame_org)

    #
    # Process Video Frame
    #
    def process_video_frame(self):
        if self.verbose:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        textX, textY = cv2.getTextSize("FPS", cv2.FONT_HERSHEY_DUPLEX, self._fontScale, 1)[0]
        textX = int(textX * self._fontScale * 1.1)
        textY = int(textY * self._fontScale * 1.1)

        frame = np.array([])

        self.fps.reset(self.videoData.get_video_fps())

        while True:
            try:
                if self._state == VideoProcessorState.Stop:
                    logging.info('>> {0}:{1}() : Stop Video Processor'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
                    break

                if self._state == VideoProcessorState.Pause:
                    if self._debug:
                        logging.info('>> {0}:{1}() : Pause Video Processor'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
                    time.sleep(1)
                    continue

                if self.videoData is None:
                    logging.info('>> {0}:{1}() : No Video Data'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
                    time.sleep(0.5)
                    continue

                grabbed, frame = self.videoData.read_frame_queue()

                if self._debug:
                    logging.info("Grabbed {} frame size {}".format(grabbed, frame.size))

                # if (grabbed == False or frame.size == 0):
                if (grabbed == False):
                    time.sleep(1/30)
                    continue
                else:
                    self.frame_org = np.copy(frame)

                if self.runInference == 1:
                    # Run Inference
                    frame = self.inference_engine.inference(frame)

                if self._annotate:
                    fps_annotation = 'FPS : {}'.format(self.currentFps)
                    cv2.putText( frame, fps_annotation, (10, textY + 10), cv2.FONT_HERSHEY_SIMPLEX, self._fontScale, (0,0,255), 2)

                self.set_display_frame(frame)
                self.currentFps = self.fps.fps(self._playback_sync)

            except Exception as ex:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                traceback.print_exception(exc_type, exc_obj, exc_tb)
                logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex))

        if self.verbose:
            logging.info('<< {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
示例#20
0
class HandTracker:
    def __init__(self, input_file=None,
                pd_path="models/palm_detection.blob", 
                pd_score_thresh=0.5, pd_nms_thresh=0.3,
                use_lm=True,
                lm_path="models/hand_landmark.blob",
                lm_score_threshold=0.5,
                use_gesture=False):

        self.camera = input_file is None
        self.pd_path = pd_path
        self.pd_score_thresh = pd_score_thresh
        self.pd_nms_thresh = pd_nms_thresh
        self.use_lm = use_lm
        self.lm_path = lm_path
        self.lm_score_threshold = lm_score_threshold
        self.use_gesture = use_gesture
        
        if not self.camera:
            if input_file.endswith('.jpg') or input_file.endswith('.png') :
                self.image_mode = True
                self.img = cv2.imread(input_file)
                self.video_size = np.min(self.img.shape[:2])
            else:
                self.image_mode = False
                self.cap = cv2.VideoCapture(input_file)
                width  = self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)   # float `width`
                height = self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float `height`
                self.video_size = int(min(width, height))
        
        # Create SSD anchors 
        # https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt
        anchor_options = mpu.SSDAnchorOptions(num_layers=4, 
                                min_scale=0.1484375,
                                max_scale=0.75,
                                input_size_height=128,
                                input_size_width=128,
                                anchor_offset_x=0.5,
                                anchor_offset_y=0.5,
                                strides=[8, 16, 16, 16],
                                aspect_ratios= [1.0],
                                reduce_boxes_in_lowest_layer=False,
                                interpolated_scale_aspect_ratio=1.0,
                                fixed_anchor_size=True)
        self.anchors = mpu.generate_anchors(anchor_options)
        self.nb_anchors = self.anchors.shape[0]
        print(f"{self.nb_anchors} anchors have been created")

        # Rendering flags
        if self.use_lm:
            self.show_pd_box = False
            self.show_pd_kps = False
            self.show_rot_rect = False
            self.show_handedness = False
            self.show_landmarks = True
            self.show_scores = False
            self.show_gesture = self.use_gesture
        else:
            self.show_pd_box = True
            self.show_pd_kps = False
            self.show_rot_rect = False
            self.show_scores = False
        

    def create_pipeline(self):
        print("Creating pipeline...")
        # Start defining a pipeline
        pipeline = dai.Pipeline()
        self.pd_input_length = 128

        if self.camera:
            # ColorCamera
            print("Creating Color Camera...")
            cam = pipeline.createColorCamera()
            cam.setPreviewSize(self.pd_input_length, self.pd_input_length)
            cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
            # Crop video to square shape (palm detection takes square image as input)
            self.video_size = min(cam.getVideoSize())
            cam.setVideoSize(self.video_size, self.video_size)
            cam.setFps(30)
            cam.setInterleaved(False)
            cam.setBoardSocket(dai.CameraBoardSocket.RGB)
            cam_out = pipeline.createXLinkOut()
            cam_out.setStreamName("cam_out")
            # Link video output to host for higher resolution
            cam.video.link(cam_out.input)

        # Define palm detection model
        print("Creating Palm Detection Neural Network...")
        pd_nn = pipeline.createNeuralNetwork()
        pd_nn.setBlobPath(str(Path(self.pd_path).resolve().absolute()))
        # Increase threads for detection
        # pd_nn.setNumInferenceThreads(2)
        # Specify that network takes latest arriving frame in non-blocking manner
        # Palm detection input                 
        if self.camera:
            pd_nn.input.setQueueSize(1)
            pd_nn.input.setBlocking(False)
            cam.preview.link(pd_nn.input)
        else:
            pd_in = pipeline.createXLinkIn()
            pd_in.setStreamName("pd_in")
            pd_in.out.link(pd_nn.input)
        # Palm detection output
        pd_out = pipeline.createXLinkOut()
        pd_out.setStreamName("pd_out")
        pd_nn.out.link(pd_out.input)
        

         # Define hand landmark model
        if self.use_lm:
            print("Creating Hand Landmark Neural Network...")          
            lm_nn = pipeline.createNeuralNetwork()
            lm_nn.setBlobPath(str(Path(self.lm_path).resolve().absolute()))
            lm_nn.setNumInferenceThreads(1)
            # Hand landmark input
            self.lm_input_length = 224
            lm_in = pipeline.createXLinkIn()
            lm_in.setStreamName("lm_in")
            lm_in.out.link(lm_nn.input)
            # Hand landmark output
            lm_out = pipeline.createXLinkOut()
            lm_out.setStreamName("lm_out")
            lm_nn.out.link(lm_out.input)
            
        print("Pipeline created.")
        return pipeline        

    
    def pd_postprocess(self, inference):
        scores = np.array(inference.getLayerFp16("classificators"), dtype=np.float16) # 896
        bboxes = np.array(inference.getLayerFp16("regressors"), dtype=np.float16).reshape((self.nb_anchors,18)) # 896x18
        # Decode bboxes
        self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors)
        # Non maximum suppression
        self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh)
        if self.use_lm:
            mpu.detections_to_rect(self.regions)
            mpu.rect_transformation(self.regions, self.video_size, self.video_size)

    def pd_render(self, frame):
        for r in self.regions:
            if self.show_pd_box:
                box = (np.array(r.pd_box) * self.video_size).astype(int)
                cv2.rectangle(frame, (box[0], box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), 2)
            if self.show_pd_kps:
                for i,kp in enumerate(r.pd_kps):
                    x = int(kp[0] * self.video_size)
                    y = int(kp[1] * self.video_size)
                    cv2.circle(frame, (x, y), 6, (0,0,255), -1)
                    cv2.putText(frame, str(i), (x, y+12), cv2.FONT_HERSHEY_PLAIN, 1.5, (0,255,0), 2)
            if self.show_scores:
                cv2.putText(frame, f"Palm score: {r.pd_score:.2f}", 
                        (int(r.pd_box[0] * self.video_size+10), int((r.pd_box[1]+r.pd_box[3])*self.video_size+60)), 
                        cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0), 2)

    def recognize_gesture(self, r):           

        # Finger states
        # state: -1=unknown, 0=close, 1=open
        d_3_5 = mpu.distance(r.landmarks[3], r.landmarks[5])
        d_2_3 = mpu.distance(r.landmarks[2], r.landmarks[3])
        angle0 = mpu.angle(r.landmarks[0], r.landmarks[1], r.landmarks[2])
        angle1 = mpu.angle(r.landmarks[1], r.landmarks[2], r.landmarks[3])
        angle2 = mpu.angle(r.landmarks[2], r.landmarks[3], r.landmarks[4])
        r.thumb_angle = angle0+angle1+angle2
        if angle0+angle1+angle2 > 460 and d_3_5 / d_2_3 > 1.2: 
            r.thumb_state = 1
        else:
            r.thumb_state = 0

        if r.landmarks[8][1] < r.landmarks[7][1] < r.landmarks[6][1]:
            r.index_state = 1
        elif r.landmarks[6][1] < r.landmarks[8][1]:
            r.index_state = 0
        else:
            r.index_state = -1

        if r.landmarks[12][1] < r.landmarks[11][1] < r.landmarks[10][1]:
            r.middle_state = 1
        elif r.landmarks[10][1] < r.landmarks[12][1]:
            r.middle_state = 0
        else:
            r.middle_state = -1

        if r.landmarks[16][1] < r.landmarks[15][1] < r.landmarks[14][1]:
            r.ring_state = 1
        elif r.landmarks[14][1] < r.landmarks[16][1]:
            r.ring_state = 0
        else:
            r.ring_state = -1

        if r.landmarks[20][1] < r.landmarks[19][1] < r.landmarks[18][1]:
            r.little_state = 1
        elif r.landmarks[18][1] < r.landmarks[20][1]:
            r.little_state = 0
        else:
            r.little_state = -1

        # Gesture
        if r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 1 and r.little_state == 1:
            r.gesture = "FIVE"
        elif r.thumb_state == 0 and r.index_state == 0 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "FIST"
        elif r.thumb_state == 1 and r.index_state == 0 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "OK" 
        elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "PEACE"
        elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "ONE"
        elif r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "TWO"
        elif r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 0 and r.little_state == 0:
            r.gesture = "THREE"
        elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 1 and r.little_state == 1:
            r.gesture = "FOUR"
        else:
            r.gesture = None
            
    def lm_postprocess(self, region, inference):
        region.lm_score = inference.getLayerFp16("Identity_1")[0]    
        region.handedness = inference.getLayerFp16("Identity_2")[0]
        lm_raw = np.array(inference.getLayerFp16("Identity_dense/BiasAdd/Add"))
        
        lm = []
        for i in range(int(len(lm_raw)/3)):
            # x,y,z -> x/w,y/h,z/w (here h=w)
            lm.append(lm_raw[3*i:3*(i+1)]/self.lm_input_length)
        region.landmarks = lm
        if self.use_gesture: self.recognize_gesture(region)


    
    def lm_render(self, frame, region):
        if region.lm_score > self.lm_score_threshold:
            if self.show_rot_rect:
                cv2.polylines(frame, [np.array(region.rect_points)], True, (0,255,255), 2, cv2.LINE_AA)
            if self.show_landmarks:
                src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32)
                dst = np.array([ (x, y) for x,y in region.rect_points[1:]], dtype=np.float32) # region.rect_points[0] is left bottom point !
                mat = cv2.getAffineTransform(src, dst)
                lm_xy = np.expand_dims(np.array([(l[0], l[1]) for l in region.landmarks]), axis=0)
                lm_xy = np.squeeze(cv2.transform(lm_xy, mat)).astype(np.int)
                list_connections = [[0, 1, 2, 3, 4], 
                                    [0, 5, 6, 7, 8], 
                                    [5, 9, 10, 11, 12],
                                    [9, 13, 14 , 15, 16],
                                    [13, 17],
                                    [0, 17, 18, 19, 20]]
                lines = [np.array([lm_xy[point] for point in line]) for line in list_connections]
                cv2.polylines(frame, lines, False, (255, 0, 0), 2, cv2.LINE_AA)
                if self.use_gesture:
                    # color depending on finger state (1=open, 0=close, -1=unknown)
                    color = { 1: (0,255,0), 0: (0,0,255), -1:(0,255,255)}
                    radius = 6
                    cv2.circle(frame, (lm_xy[0][0], lm_xy[0][1]), radius, color[-1], -1)
                    for i in range(1,5):
                        cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.thumb_state], -1)
                    for i in range(5,9):
                        cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.index_state], -1)
                    for i in range(9,13):
                        cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.middle_state], -1)
                    for i in range(13,17):
                        cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.ring_state], -1)
                    for i in range(17,21):
                        cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.little_state], -1)
                else:
                    for x,y in lm_xy:
                        cv2.circle(frame, (x, y), 6, (0,128,255), -1)
            if self.show_handedness:
                cv2.putText(frame, f"RIGHT {region.handedness:.2f}" if region.handedness > 0.5 else f"LEFT {1-region.handedness:.2f}", 
                        (int(region.pd_box[0] * self.video_size+10), int((region.pd_box[1]+region.pd_box[3])*self.video_size+20)), 
                        cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0) if region.handedness > 0.5 else (0,0,255), 2)
            if self.show_scores:
                cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}", 
                        (int(region.pd_box[0] * self.video_size+10), int((region.pd_box[1]+region.pd_box[3])*self.video_size+90)), 
                        cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0), 2)
            if self.use_gesture and self.show_gesture:
                cv2.putText(frame, region.gesture, (int(region.pd_box[0]*self.video_size+10), int(region.pd_box[1]*self.video_size-50)), 
                        cv2.FONT_HERSHEY_PLAIN, 3, (255,255,255), 3)

        

    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues 
        if self.camera:
            q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False)
                q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True)
                q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()
            if self.camera:
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
            else:
                if self.image_mode:
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                dx = (w - self.video_size) // 2
                dy = (h - self.video_size) // 2
                video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size]
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length)))
                q_pd_in.send(frame_nn)
                pd_rtrip_time = now()

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Hand landmarks
            if self.use_lm:
                for i,r in enumerate(self.regions):
                    img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length)
                    nn_data = dai.NNData()   
                    nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length)))
                    q_lm_in.send(nn_data)
                    if i == 0: lm_rtrip_time = now() # We measure only for the first region
                
                # Retrieve hand landmarks
                for i,r in enumerate(self.regions):
                    inference = q_lm_out.get()
                    if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
                    nb_lm_inferences += 1

                
            self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100))
            cv2.imshow("video", annotated_frame)

            key = cv2.waitKey(1) 
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_handedness = not self.show_handedness
            elif key == ord('6'):
                self.show_scores = not self.show_scores
            elif key == ord('7'):
                self.show_gesture = not self.show_gesture

        # Print some stats
        if not self.camera:
            print(f"# video files frames                 : {seq_num}")
            print(f"# palm detection inferences received : {nb_pd_inferences}")
            print(f"# hand landmark inferences received  : {nb_lm_inferences}")
            print(f"Palm detection round trip            : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms")
            print(f"Hand landmark round trip             : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues
        if self.input_type == "internal":
            q_video = device.getOutputQueue(name="cam_out",
                                            maxSize=1,
                                            blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=1,
                                             blocking=False)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=2,
                                             blocking=False)
            q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()

            if self.input_type == "internal":
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
                self.frame_size = video_frame.shape[
                    0]  # The image is square cropped on the device
                self.pad_w = self.pad_h = 0
            else:
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break

                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(
                    to_planar(video_frame,
                              (self.pd_input_length, self.pd_input_length)))
                pd_rtrip_time = now()
                q_pd_in.send(frame_nn)

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get pose detection
            inference = q_pd_out.get()
            if self.input_type != "internal":
                pd_rtrip_time = now() - pd_rtrip_time
                glob_pd_rtrip_time += pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            for i, r in enumerate(self.regions):
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(frame_nn,
                              (self.lm_input_length, self.lm_input_length)))
                if i == 0:
                    lm_rtrip_time = now(
                    )  # We measure only for the first region
                q_lm_in.send(nn_data)

                # Get landmarks
                inference = q_lm_out.get()
                if i == 0:
                    lm_rtrip_time = now() - lm_rtrip_time
                    glob_lm_rtrip_time += lm_rtrip_time
                    nb_lm_inferences += 1
                self.lm_postprocess(r, inference)
                self.lm_render(annotated_frame, r)
            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if self.input_type != "internal" and not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps

        # Print some stats
        print(f"# pose detection inferences : {nb_pd_inferences}")
        print(f"# landmark inferences       : {nb_lm_inferences}")
        if self.input_type != "internal" and nb_pd_inferences != 0:
            print(
                f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
            )
        if nb_lm_inferences != 0:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
示例#22
0
from FPS import FPS


# drive_data = DataControl()
# drive_data.start()
# drive_data.set("tfl", "red")
# objd = OBJDetection(drive_data)
# objd.load()
# laned = LANEDetection(drive_data)

cap = CVCapIN.CVCapIN(id_c=2)
cap.start()

# drive = Drive()
optf = OpticalFlow()
fpser = FPS()
fpser.start()
while cv2.waitKey(1) != ord("q"):
    
    # drive_data.set("tfl", "red")
    _, frame = cap.read()
    # cv2.imshow("ddd", frame)
    # laned_img = laned.run(frame.copy())
    # cv2.imshow("laned", laned_img)
    # objd_img = objd.run(frame.copy())
    # cv2.imshow("objd", objd_img)

    # drive.run()
    
    # print(str(drive_data))
    optf_img = optf.run(frame)
class BlazeposeDepthai:
    def __init__(self,
                 input_src=None,
                 pd_path=POSE_DETECTION_MODEL,
                 pd_score_thresh=0.5,
                 pd_nms_thresh=0.3,
                 lm_path=FULL_BODY_LANDMARK_MODEL,
                 lm_score_threshold=0.7,
                 full_body=True,
                 use_gesture=False,
                 smoothing=True,
                 filter_window_size=5,
                 filter_velocity_scale=10,
                 show_3d=False,
                 crop=False,
                 multi_detection=False,
                 output=None,
                 internal_fps=15):

        self.pd_path = pd_path
        self.pd_score_thresh = pd_score_thresh
        self.pd_nms_thresh = pd_nms_thresh
        self.lm_path = lm_path
        self.lm_score_threshold = lm_score_threshold
        self.full_body = full_body
        self.use_gesture = use_gesture
        self.smoothing = smoothing
        self.show_3d = show_3d
        self.crop = crop
        self.multi_detection = multi_detection
        if self.multi_detection:
            print("With multi-detection, smoothing filter is disabled.")
            self.smoothing = False
        self.internal_fps = internal_fps

        if input_src == None:
            self.input_type = "internal"  # OAK* internal color camera
            self.video_fps = internal_fps  # Used when saving the output in a video file. Should be close to the real fps
            video_height = video_width = 1080  # Depends on cam.setResolution() in create_pipeline()
        elif input_src.endswith('.jpg') or input_src.endswith('.png'):
            self.input_type = "image"
            self.img = cv2.imread(input_src)
            self.video_fps = 25
            video_height, video_width = self.img.shape[:2]
        else:
            self.input_type = "video"
            if input_src.isdigit():
                input_type = "webcam"
                input_src = int(input_src)
            self.cap = cv2.VideoCapture(input_src)
            self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
            video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            print("Video FPS:", self.video_fps)

        self.nb_kps = 33 if self.full_body else 25

        if self.smoothing:
            self.filter = mpu.LandmarksSmoothingFilter(filter_window_size,
                                                       filter_velocity_scale,
                                                       (self.nb_kps, 3))

        # Create SSD anchors
        # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt
        anchor_options = mpu.SSDAnchorOptions(
            num_layers=4,
            min_scale=0.1484375,
            max_scale=0.75,
            input_size_height=128,
            input_size_width=128,
            anchor_offset_x=0.5,
            anchor_offset_y=0.5,
            strides=[8, 16, 16, 16],
            aspect_ratios=[1.0],
            reduce_boxes_in_lowest_layer=False,
            interpolated_scale_aspect_ratio=1.0,
            fixed_anchor_size=True)
        self.anchors = mpu.generate_anchors(anchor_options)
        self.nb_anchors = self.anchors.shape[0]
        print(f"{self.nb_anchors} anchors have been created")

        # Rendering flags
        self.show_pd_box = False
        self.show_pd_kps = False
        self.show_rot_rect = False
        self.show_landmarks = True
        self.show_scores = False
        self.show_gesture = self.use_gesture
        self.show_fps = True

        if self.show_3d:
            self.vis3d = o3d.visualization.Visualizer()
            self.vis3d.create_window()
            opt = self.vis3d.get_render_option()
            opt.background_color = np.asarray([0, 0, 0])
            z = min(video_height, video_width) / 3
            self.grid_floor = create_grid([0, video_height, -z],
                                          [video_width, video_height, -z],
                                          [video_width, video_height, z],
                                          [0, video_height, z],
                                          5,
                                          2,
                                          color=(1, 1, 1))
            self.grid_wall = create_grid([0, 0, z], [video_width, 0, z],
                                         [video_width, video_height, z],
                                         [0, video_height, z],
                                         5,
                                         2,
                                         color=(1, 1, 1))
            self.vis3d.add_geometry(self.grid_floor)
            self.vis3d.add_geometry(self.grid_wall)
            view_control = self.vis3d.get_view_control()
            view_control.set_up(np.array([0, -1, 0]))
            view_control.set_front(np.array([0, 0, -1]))

        if output is None:
            self.output = None
        else:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            self.output = cv2.VideoWriter(output, fourcc, self.video_fps,
                                          (video_width, video_height))

    def create_pipeline(self):
        print("Creating pipeline...")
        # Start defining a pipeline
        pipeline = dai.Pipeline()
        pipeline.setOpenVINOVersion(
            version=dai.OpenVINO.Version.VERSION_2021_2)
        self.pd_input_length = 128

        if self.input_type == "internal":
            # ColorCamera
            print("Creating Color Camera...")
            cam = pipeline.createColorCamera()
            cam.setPreviewSize(self.pd_input_length, self.pd_input_length)
            cam.setResolution(
                dai.ColorCameraProperties.SensorResolution.THE_1080_P)
            # Crop video to square shape (palm detection takes square image as input)
            self.video_size = min(cam.getVideoSize())
            cam.setVideoSize(self.video_size, self.video_size)
            #
            cam.setFps(self.internal_fps)
            cam.setInterleaved(False)
            cam.setBoardSocket(dai.CameraBoardSocket.RGB)
            cam_out = pipeline.createXLinkOut()
            cam_out.setStreamName("cam_out")
            # Link video output to host for higher resolution
            cam.video.link(cam_out.input)

        # Define pose detection model
        print("Creating Pose Detection Neural Network...")
        pd_nn = pipeline.createNeuralNetwork()
        pd_nn.setBlobPath(str(Path(self.pd_path).resolve().absolute()))
        # Increase threads for detection
        # pd_nn.setNumInferenceThreads(2)
        # Specify that network takes latest arriving frame in non-blocking manner
        # Pose detection input
        if self.input_type == "internal":
            pd_nn.input.setQueueSize(1)
            pd_nn.input.setBlocking(False)
            cam.preview.link(pd_nn.input)
        else:
            pd_in = pipeline.createXLinkIn()
            pd_in.setStreamName("pd_in")
            pd_in.out.link(pd_nn.input)
        # Pose detection output
        pd_out = pipeline.createXLinkOut()
        pd_out.setStreamName("pd_out")
        pd_nn.out.link(pd_out.input)

        # Define landmark model
        print("Creating Landmark Neural Network...")
        lm_nn = pipeline.createNeuralNetwork()
        lm_nn.setBlobPath(str(Path(self.lm_path).resolve().absolute()))
        lm_nn.setNumInferenceThreads(1)
        # Landmark input
        self.lm_input_length = 256
        lm_in = pipeline.createXLinkIn()
        lm_in.setStreamName("lm_in")
        lm_in.out.link(lm_nn.input)
        # Landmark output
        lm_out = pipeline.createXLinkOut()
        lm_out.setStreamName("lm_out")
        lm_nn.out.link(lm_out.input)

        print("Pipeline created.")
        return pipeline

    def pd_postprocess(self, inference):
        scores = np.array(inference.getLayerFp16("classificators"),
                          dtype=np.float16)  # 896
        bboxes = np.array(inference.getLayerFp16("regressors"),
                          dtype=np.float16).reshape(
                              (self.nb_anchors, 12))  # 896x12

        # Decode bboxes
        self.regions = mpu.decode_bboxes(self.pd_score_thresh,
                                         scores,
                                         bboxes,
                                         self.anchors,
                                         best_only=not self.multi_detection)
        # Non maximum suppression (not needed if best_only is True)
        if self.multi_detection:
            self.regions = mpu.non_max_suppression(self.regions,
                                                   self.pd_nms_thresh)

        mpu.detections_to_rect(self.regions,
                               kp_pair=[0, 1] if self.full_body else [2, 3])
        mpu.rect_transformation(self.regions, self.frame_size, self.frame_size)

    def pd_render(self, frame):
        for r in self.regions:
            if self.show_pd_box:
                box = (np.array(r.pd_box) * self.frame_size).astype(int)
                cv2.rectangle(frame, (box[0], box[1]),
                              (box[0] + box[2], box[1] + box[3]), (0, 255, 0),
                              2)
            if self.show_pd_kps:
                # Key point 0 - mid hip center
                # Key point 1 - point that encodes size & rotation (for full body)
                # Key point 2 - mid shoulder center
                # Key point 3 - point that encodes size & rotation (for upper body)
                if self.full_body:
                    # Only kp 0 and 1 used
                    list_kps = [0, 1]
                else:
                    # Only kp 2 and 3 used for upper body
                    list_kps = [2, 3]
                for kp in list_kps:
                    x = int(r.pd_kps[kp][0] * self.frame_size)
                    y = int(r.pd_kps[kp][1] * self.frame_size)
                    cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)
                    cv2.putText(frame, str(kp), (x, y + 12),
                                cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
            if self.show_scores:
                cv2.putText(
                    frame, f"Pose score: {r.pd_score:.2f}",
                    (int(r.pd_box[0] * self.frame_size + 10),
                     int((r.pd_box[1] + r.pd_box[3]) * self.frame_size + 60)),
                    cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)

    def lm_postprocess(self, region, inference):
        region.lm_score = inference.getLayerFp16("output_poseflag")[0]
        if region.lm_score > self.lm_score_threshold:
            self.nb_active_regions += 1

            lm_raw = np.array(inference.getLayerFp16("ld_3d")).reshape(-1, 5)
            # Each keypoint have 5 information:
            # - X,Y coordinates are local to the region of
            # interest and range from [0.0, 255.0].
            # - Z coordinate is measured in "image pixels" like
            # the X and Y coordinates and represents the
            # distance relative to the plane of the subject's
            # hips, which is the origin of the Z axis. Negative
            # values are between the hips and the camera;
            # positive values are behind the hips. Z coordinate
            # scale is similar with X, Y scales but has different
            # nature as obtained not via human annotation, by
            # fitting synthetic data (GHUM model) to the 2D
            # annotation.
            # - Visibility, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame and not occluded by another bigger body
            # part or another object.
            # - Presence, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame.

            # Normalize x,y,z. Scaling in z = scaling in x = 1/self.lm_input_length
            lm_raw[:, :3] /= self.lm_input_length
            # Apply sigmoid on visibility and presence (if used later)
            # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5]))

            # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box
            region.landmarks = lm_raw[:, :3]
            # Calculate the landmark coordinate in square padded image (region.landmarks_padded)
            src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32)
            dst = np.array(
                [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32
            )  # region.rect_points[0] is left bottom point and points going clockwise!
            mat = cv2.getAffineTransform(src, dst)
            lm_xy = np.expand_dims(region.landmarks[:self.nb_kps, :2], axis=0)
            lm_xy = np.squeeze(cv2.transform(lm_xy, mat))
            # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the
            # original image. Then we arbitrarily divide by 4 for a more realistic appearance.
            lm_z = region.landmarks[:self.nb_kps, 2:3] * region.rect_w_a / 4
            lm_xyz = np.hstack((lm_xy, lm_z))
            if self.smoothing:
                lm_xyz = self.filter.apply(lm_xyz)
            region.landmarks_padded = lm_xyz.astype(np.int)
            # If we added padding to make the image square, we need to remove this padding from landmark coordinates
            # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed))
            region.landmarks_abs = region.landmarks_padded.copy()
            if self.pad_h > 0:
                region.landmarks_abs[:, 1] -= self.pad_h
            if self.pad_w > 0:
                region.landmarks_abs[:, 0] -= self.pad_w

            if self.use_gesture: self.recognize_gesture(region)

    def lm_render(self, frame, region):
        if region.lm_score > self.lm_score_threshold:
            if self.show_rot_rect:
                cv2.polylines(frame, [np.array(region.rect_points)], True,
                              (0, 255, 255), 2, cv2.LINE_AA)
            if self.show_landmarks:

                list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY
                lines = [
                    np.array(
                        [region.landmarks_padded[point, :2] for point in line])
                    for line in list_connections
                ]
                cv2.polylines(frame, lines, False, (255, 180, 90), 2,
                              cv2.LINE_AA)

                for i, x_y in enumerate(region.landmarks_padded[:, :2]):
                    if i > 10:
                        color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255)
                    elif i == 0:
                        color = (0, 255, 255)
                    elif i in [4, 5, 6, 8, 10]:
                        color = (0, 255, 0)
                    else:
                        color = (0, 0, 255)
                    cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11)

                if self.show_3d:
                    points = region.landmarks_abs
                    lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY
                    colors = COLORS_FULL_BODY
                    for i, a_b in enumerate(lines):
                        a, b = a_b
                        line = create_segment(points[a],
                                              points[b],
                                              radius=5,
                                              color=colors[i])
                        if line:
                            self.vis3d.add_geometry(line,
                                                    reset_bounding_box=False)

            if self.show_scores:
                cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}",
                            (int(region.pd_box[0] * self.frame_size + 10),
                             int((region.pd_box[1] + region.pd_box[3]) *
                                 self.frame_size + 90)),
                            cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
            if self.use_gesture and self.show_gesture:
                cv2.putText(frame, region.gesture,
                            (int(region.pd_box[0] * self.frame_size + 10),
                             int(region.pd_box[1] * self.frame_size - 50)),
                            cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3)

    def recognize_gesture(self, r):
        def angle_with_y(v):
            # v: 2d vector (x,y)
            # Returns angle in degree ofv with y-axis of image plane
            if v[1] == 0:
                return 90
            angle = atan2(v[0], v[1])
            return np.degrees(angle)

        # For the demo, we want to recognize the flag semaphore alphabet
        # For this task, we just need to measure the angles of both arms with vertical
        right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] -
                                       r.landmarks_abs[12, :2])
        left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] -
                                      r.landmarks_abs[11, :2])
        right_pose = int((right_arm_angle + 202.5) / 45)
        left_pose = int((left_arm_angle + 202.5) / 45)
        r.gesture = semaphore_flag.get((right_pose, left_pose), None)

    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues
        if self.input_type == "internal":
            q_video = device.getOutputQueue(name="cam_out",
                                            maxSize=1,
                                            blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=1,
                                             blocking=False)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=2,
                                             blocking=False)
            q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()

            if self.input_type == "internal":
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
                self.frame_size = video_frame.shape[
                    0]  # The image is square cropped on the device
                self.pad_w = self.pad_h = 0
            else:
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break

                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(
                    to_planar(video_frame,
                              (self.pd_input_length, self.pd_input_length)))
                pd_rtrip_time = now()
                q_pd_in.send(frame_nn)

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get pose detection
            inference = q_pd_out.get()
            if self.input_type != "internal":
                pd_rtrip_time = now() - pd_rtrip_time
                glob_pd_rtrip_time += pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            for i, r in enumerate(self.regions):
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(frame_nn,
                              (self.lm_input_length, self.lm_input_length)))
                if i == 0:
                    lm_rtrip_time = now(
                    )  # We measure only for the first region
                q_lm_in.send(nn_data)

                # Get landmarks
                inference = q_lm_out.get()
                if i == 0:
                    lm_rtrip_time = now() - lm_rtrip_time
                    glob_lm_rtrip_time += lm_rtrip_time
                    nb_lm_inferences += 1
                self.lm_postprocess(r, inference)
                self.lm_render(annotated_frame, r)
            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if self.input_type != "internal" and not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps

        # Print some stats
        print(f"# pose detection inferences : {nb_pd_inferences}")
        print(f"# landmark inferences       : {nb_lm_inferences}")
        if self.input_type != "internal" and nb_pd_inferences != 0:
            print(
                f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
            )
        if nb_lm_inferences != 0:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
示例#24
0
class MovenetDepthai:
    """
    Movenet body pose detector
    Arguments:
    - input_src: frame source,
                    - "rgb" or None: OAK* internal color camera,
                    - "rgb_laconic": same as "rgb" but without sending the frames to the host,
                    - a file path of an image or a video,
                    - an integer (eg 0) for a webcam id,
    - model: Movenet blob file,
                    - "thunder": the default thunder blob file (see variable MOVENET_THUNDER_MODEL),
                    - "lightning": the default lightning blob file (see variable MOVENET_LIGHTNING_MODEL),
                    - a path of a blob file. It is important that the filename contains
                    the string "thunder" or "lightning" to identify the tyoe of the model.
    - score_thresh : confidence score to determine whether a keypoint prediction is reliable (a float between 0 and 1).
    - crop : boolean which indicates if square cropping is done or not
    - internal_fps : when using the internal color camera as input source, set its FPS to this value (calling setFps()).
    - internal_frame_height : when using the internal color camera, set the frame height (calling setIspScale()).
                                The width is calculated accordingly to height and depends on value of 'crop'
    - stats : True or False, when True, display the global FPS when exiting.
    """
    def __init__(
        self,
        input_src="rgb",
        model=None,
        score_thresh=0.2,
        crop=False,
        internal_fps=None,
        internal_frame_height=640,
        stats=True,
    ):

        self.model = model

        if model == "lightning":
            self.model = str(MOVENET_LIGHTNING_MODEL)
            self.pd_input_length = 192
        elif model == "thunder":
            self.model = str(MOVENET_THUNDER_MODEL)
            self.pd_input_length = 256
        else:
            self.model = model
            if "lightning" in str(model):
                self.pd_input_length = 192
            else:  # Thunder
                self.pd_input_length = 256
        print(f"Using blob file : {self.model}")

        print(
            f"MoveNet imput size : {self.pd_input_length}x{self.pd_input_length}x3"
        )
        self.score_thresh = score_thresh

        self.crop = crop
        self.internal_fps = internal_fps
        self.stats = stats

        if input_src is None or input_src == "rgb" or input_src == "rgb_laconic":
            self.input_type = "rgb"  # OAK* internal color camera
            self.laconic = ("laconic" in input_src
                            )  # Camera frames are not sent to the host
            if internal_fps is None:
                if "thunder" in str(model):
                    self.internal_fps = 12
                else:
                    self.internal_fps = 26
            else:
                self.internal_fps = internal_fps
            print(f"Internal camera FPS set to: {self.internal_fps}")

            self.video_fps = internal_fps  # Used when saving the output in a video file. Should be close to the real fps

            if self.crop:
                self.frame_size, self.scale_nd = find_isp_scale_params(
                    internal_frame_height)
                self.img_h = self.img_w = self.frame_size
            else:
                width, self.scale_nd = find_isp_scale_params(
                    internal_frame_height * 1920 / 1080, is_height=False)
                self.img_h = int(
                    round(1080 * self.scale_nd[0] / self.scale_nd[1]))
                self.img_w = int(
                    round(1920 * self.scale_nd[0] / self.scale_nd[1]))
                self.frame_size = self.img_w
            print(f"Internal camera image size: {self.img_w} x {self.img_h}")

        elif input_src.endswith(".jpg") or input_src.endswith(".png"):
            self.input_type = "image"
            self.img = cv2.imread(input_src)
            self.video_fps = 25
            self.img_h, self.img_w = self.img.shape[:2]
        else:
            self.input_type = "video"
            if input_src.isdigit():
                input_type = "webcam"
                input_src = int(input_src)
            self.cap = cv2.VideoCapture(input_src)
            self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
            self.img_w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            self.img_h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            print("Video FPS:", self.video_fps)

        # Defines the default crop region (pads the full image from both sides to make it a square image)
        # Used when the algorithm cannot reliably determine the crop region from the previous frame.
        box_size = max(self.img_w, self.img_h)
        x_min = (self.img_w - box_size) // 2
        y_min = (self.img_h - box_size) // 2
        self.init_crop_region = CropRegion(x_min, y_min, x_min + box_size,
                                           y_min + box_size, box_size)
        self.crop_region = self.init_crop_region

        self.device = dai.Device(self.create_pipeline())
        # self.device.startPipeline()
        print("Pipeline started")

        # Define data queues
        if self.input_type == "rgb":
            if not self.laconic:
                self.q_video = self.device.getOutputQueue(name="cam_out",
                                                          maxSize=1,
                                                          blocking=False)
            self.q_manip_cfg = self.device.getInputQueue(name="manip_cfg")
        else:
            self.q_pd_in = self.device.getInputQueue(name="pd_in")
        self.q_pd_out = self.device.getOutputQueue(name="pd_out",
                                                   maxSize=4,
                                                   blocking=False)

        self.q_ac_in = self.device.getInputQueue(name="ac_in")
        self.q_ac_out = self.device.getOutputQueue(name="ac_out",
                                                   maxSize=4,
                                                   blocking=False)

        self.fps = FPS()

        self.nb_frames = 0
        self.nb_pd_inferences = 0

    def create_pipeline(self):
        print("Creating pipeline...")
        # Start defining a pipeline
        pipeline = dai.Pipeline()
        pipeline.setOpenVINOVersion(dai.OpenVINO.Version.VERSION_2021_3)

        if self.input_type == "rgb":
            # ColorCamera
            print("Creating Color Camera...")
            # cam = pipeline.create(dai.node.ColorCamera)
            cam = pipeline.createColorCamera()
            cam.setResolution(
                dai.ColorCameraProperties.SensorResolution.THE_1080_P)
            cam.setInterleaved(False)
            cam.setIspScale(self.scale_nd[0], self.scale_nd[1])
            cam.setFps(self.internal_fps)
            cam.setBoardSocket(dai.CameraBoardSocket.RGB)
            cam.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
            if self.crop:
                cam.setVideoSize(self.frame_size, self.frame_size)
                cam.setPreviewSize(self.frame_size, self.frame_size)
            else:
                cam.setVideoSize(self.img_w, self.img_h)
                cam.setPreviewSize(self.img_w, self.img_h)

            if not self.laconic:
                cam_out = pipeline.createXLinkOut()
                # cam_out = pipeline.create(dai.node.XLinkOut)
                cam_out.setStreamName("cam_out")
                cam.video.link(cam_out.input)

            # ImageManip for cropping
            manip = pipeline.createImageManip()
            manip.setMaxOutputFrameSize(self.pd_input_length *
                                        self.pd_input_length * 3)
            manip.setWaitForConfigInput(True)
            manip.inputImage.setQueueSize(1)
            manip.inputImage.setBlocking(False)

            manip_cfg = pipeline.createXLinkIn()
            manip_cfg.setStreamName("manip_cfg")

            cam.preview.link(manip.inputImage)
            manip_cfg.out.link(manip.inputConfig)

        # Define pose detection model
        print("Creating Pose Detection Neural Network...")
        pd_nn = pipeline.createNeuralNetwork()
        # pd_nn = pipeline.create(dai.node.NeuralNetwork)
        pd_nn.setBlobPath(str(Path(self.model).resolve().absolute()))
        # pd_nn.input.setQueueSize(1)
        # Increase threads for detection
        # pd_nn.setNumInferenceThreads(2)
        if self.input_type == "rgb":
            manip.out.link(pd_nn.input)
        else:
            pd_in = pipeline.createXLinkIn()
            pd_in.setStreamName("pd_in")
            pd_in.out.link(pd_nn.input)

        # Define link to send pd result to host
        pd_out = pipeline.createXLinkOut()
        # pd_out = pipeline.create(dai.node.XLinkOut)
        pd_out.setStreamName("pd_out")

        pd_nn.out.link(pd_out.input)

        # Define action detection model
        print("Creating Action Detection Neural Network...")
        ac_nn = pipeline.createNeuralNetwork()
        ac_nn.setBlobPath(
            str(
                Path("models/action_openvino_2021.3_6shave.blob").resolve().
                absolute()))
        ac_in = pipeline.createXLinkIn()
        ac_in.setStreamName("ac_in")
        ac_in.out.link(ac_nn.input)

        # Define link to send pd result to host
        ac_out = pipeline.createXLinkOut()
        ac_out.setStreamName("ac_out")

        ac_nn.out.link(ac_out.input)

        print("Pipeline created.")

        return pipeline

    def crop_and_resize(self, frame, crop_region):
        """Crops and resize the image to prepare for the model input."""
        cropped = frame[
            max(0, crop_region.ymin):min(self.img_h, crop_region.ymax),
            max(0, crop_region.xmin):min(self.img_w, crop_region.xmax), ]

        if (crop_region.xmin < 0 or crop_region.xmax >= self.img_w
                or crop_region.ymin < 0 or crop_region.ymax >= self.img_h):
            # Padding is necessary
            cropped = cv2.copyMakeBorder(
                cropped,
                max(0, -crop_region.ymin),
                max(0, crop_region.ymax - self.img_h),
                max(0, -crop_region.xmin),
                max(0, crop_region.xmax - self.img_w),
                cv2.BORDER_CONSTANT,
            )

        cropped = cv2.resize(
            cropped,
            (self.pd_input_length, self.pd_input_length),
            interpolation=cv2.INTER_AREA,
        )
        return cropped

    def torso_visible(self, scores):
        """Checks whether there are enough torso keypoints.

        This function checks whether the model is confident at predicting one of the
        shoulders/hips which is required to determine a good crop region.
        """
        return (
            scores[KEYPOINT_DICT["left_hip"]] > self.score_thresh
            or scores[KEYPOINT_DICT["right_hip"]] > self.score_thresh) and (
                scores[KEYPOINT_DICT["left_shoulder"]] > self.score_thresh
                or scores[KEYPOINT_DICT["right_shoulder"]] > self.score_thresh)

    def determine_torso_and_body_range(self, keypoints, scores, center_x,
                                       center_y):
        """Calculates the maximum distance from each keypoints to the center location.

        The function returns the maximum distances from the two sets of keypoints:
        full 17 keypoints and 4 torso keypoints. The returned information will be
        used to determine the crop size. See determine_crop_region for more detail.
        """
        torso_joints = [
            "left_shoulder", "right_shoulder", "left_hip", "right_hip"
        ]
        max_torso_yrange = 0.0
        max_torso_xrange = 0.0
        for joint in torso_joints:
            dist_y = abs(center_y - keypoints[KEYPOINT_DICT[joint]][1])
            dist_x = abs(center_x - keypoints[KEYPOINT_DICT[joint]][0])
            if dist_y > max_torso_yrange:
                max_torso_yrange = dist_y
            if dist_x > max_torso_xrange:
                max_torso_xrange = dist_x

        max_body_yrange = 0.0
        max_body_xrange = 0.0
        for i in range(len(KEYPOINT_DICT)):
            if scores[i] < self.score_thresh:
                continue
            dist_y = abs(center_y - keypoints[i][1])
            dist_x = abs(center_x - keypoints[i][0])
            if dist_y > max_body_yrange:
                max_body_yrange = dist_y
            if dist_x > max_body_xrange:
                max_body_xrange = dist_x

        return [
            max_torso_yrange, max_torso_xrange, max_body_yrange,
            max_body_xrange
        ]

    def determine_crop_region(self, body):
        """Determines the region to crop the image for the model to run inference on.

        The algorithm uses the detected joints from the previous frame to estimate
        the square region that encloses the full body of the target person and
        centers at the midpoint of two hip joints. The crop size is determined by
        the distances between each joints and the center point.
        When the model is not confident with the four torso joint predictions, the
        function returns a default crop which is the full image padded to square.
        """
        if self.torso_visible(body.scores):
            center_x = (body.keypoints[KEYPOINT_DICT["left_hip"]][0] +
                        body.keypoints[KEYPOINT_DICT["right_hip"]][0]) // 2
            center_y = (body.keypoints[KEYPOINT_DICT["left_hip"]][1] +
                        body.keypoints[KEYPOINT_DICT["right_hip"]][1]) // 2
            (
                max_torso_yrange,
                max_torso_xrange,
                max_body_yrange,
                max_body_xrange,
            ) = self.determine_torso_and_body_range(body.keypoints,
                                                    body.scores, center_x,
                                                    center_y)
            crop_length_half = np.amax([
                max_torso_xrange * 1.9,
                max_torso_yrange * 1.9,
                max_body_yrange * 1.2,
                max_body_xrange * 1.2,
            ])
            tmp = np.array([
                center_x, self.img_w - center_x, center_y,
                self.img_h - center_y
            ])
            crop_length_half = int(
                round(np.amin([crop_length_half,
                               np.amax(tmp)])))
            crop_corner = [
                center_x - crop_length_half, center_y - crop_length_half
            ]

            if crop_length_half > max(self.img_w, self.img_h) / 2:
                return self.init_crop_region
            else:
                crop_length = crop_length_half * 2
                return CropRegion(
                    crop_corner[0],
                    crop_corner[1],
                    crop_corner[0] + crop_length,
                    crop_corner[1] + crop_length,
                    crop_length,
                )
        else:
            return self.init_crop_region

    def pd_postprocess(self, inference):

        kps = np.array(inference.getLayerFp16("Identity")).reshape(-1,
                                                                   3)  # 17x3
        body = Body(
            scores=kps[:, 2],
            keypoints_norm=kps[:, [1, 0]],
            score_thresh=self.score_thresh,
        )
        body.keypoints = (
            np.array([self.crop_region.xmin, self.crop_region.ymin]) +
            body.keypoints_norm * self.crop_region.size).astype(np.int)
        body.crop_region = self.crop_region
        body.next_crop_region = self.determine_crop_region(body)
        return body

    def next_frame(self):

        self.fps.update()
        if self.input_type == "rgb":
            # Send cropping information to manip node on device
            cfg = dai.ImageManipConfig()
            points = [
                [self.crop_region.xmin, self.crop_region.ymin],
                [self.crop_region.xmax - 1, self.crop_region.ymin],
                [self.crop_region.xmax - 1, self.crop_region.ymax - 1],
                [self.crop_region.xmin, self.crop_region.ymax - 1],
            ]
            point2fList = []
            for p in points:
                pt = dai.Point2f()
                pt.x, pt.y = p[0], p[1]
                point2fList.append(pt)
            cfg.setWarpTransformFourPoints(point2fList, False)
            cfg.setResize(self.pd_input_length, self.pd_input_length)
            cfg.setFrameType(dai.ImgFrame.Type.RGB888p)
            self.q_manip_cfg.send(cfg)

            # Get the device camera frame if wanted
            if self.laconic:
                frame = np.zeros((self.frame_size, self.frame_size, 3),
                                 dtype=np.uint8)
            else:
                in_video = self.q_video.get()
                frame = in_video.getCvFrame()
        else:
            if self.input_type == "image":
                frame = self.img.copy()
            else:
                ok, frame = self.cap.read()
                if not ok:
                    return None, None

            # Cropping of the video frame
            cropped = self.crop_and_resize(frame, self.crop_region)
            cropped = cv2.cvtColor(cropped,
                                   cv2.COLOR_BGR2RGB).transpose(2, 0, 1)

            frame_nn = dai.ImgFrame()
            frame_nn.setTimestamp(time.monotonic())
            frame_nn.setWidth(self.pd_input_length)
            frame_nn.setHeight(self.pd_input_length)
            frame_nn.setData(cropped)
            self.q_pd_in.send(frame_nn)

        # Get result from device
        inference = self.q_pd_out.get()
        body = self.pd_postprocess(inference)
        self.crop_region = body.next_crop_region

        # Statistics
        if self.stats:
            self.nb_frames += 1
            self.nb_pd_inferences += 1

        return frame, body

    def exit(self):
        # Print some stats
        if self.stats:
            print(
                f"FPS : {self.fps.global_duration():.1f} f/s (# frames = {self.fps.nb_frames()})"
            )
示例#25
0
    def __init__(self, master):
        """
        Start the GUI and the asynchronous threads. We are in the main
        (original) thread of the application, which will later be used by
        the GUI as well.
        """
        self.master = master
        self.gps_destination_file_name = ""
        self.frames_destination_timestamp = ""
        self.video_destination_file_handler = cv2.VideoWriter()
        self.gps_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/GPS/"
        self.video_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/Videos/"
        self.record = False
        self.img = None
        self.frame = None
        self.frame1 = None
        self.f = 0
        self.check = False
        self.brahim = 1.0 / 25

        # p = Properties()
        # with open('config.properties', 'rb') as f:
        #     p.load(f, 'utf-8')

        # print(p["video_id"].data)                     #### incremtentaiton properties file
        # p["video_id"]=str(int(p["video_id"].data) +1)
        # print(p["video_id"].data)
        # with open('config.properties', 'wb') as f:
        #     p.store(f, encoding="utf-8")

        self.frame = None
        # Create the gps queue
        self.gps_queue = Queue()
        # principal thread
        self.running = 1
        self.cameras_state = self.find_cam()
        print(self.cameras_state)
        if self.cameras_state[0]:
            self.camera = See3Cam(src=self.cameras_state[1],
                                  width=1280,
                                  height=720,
                                  framerate=30,
                                  name="CAM")
            self.camera.start()
        if self.cameras_state[2]:
            self.camera1 = See3Cam(src=self.cameras_state[3],
                                   width=1280,
                                   height=720,
                                   framerate=30,
                                   name="CAM1")
            self.camera1.start()

        self.fps = FPS()

        # Set up Gps
        self.gps = GpsCapture()
        # Set up the GUI part
        self.gui = GuiPart(master, self, self.cameras_state, self.gps.running,
                           self.recordData, self.stopRecord)

        # Set up the thread for the GPS checking
        gps_controller = threading.Thread(target=self.checkGpsConnection,
                                          args=(2, ))
        gps_controller.setDaemon(True)
        gps_controller.start()

        # Set up the thread for the camera checking
        camera_controller = threading.Thread(target=self.checkCameraConnection,
                                             args=(2, ))
        camera_controller.setDaemon(True)
        camera_controller.start()

        # # Set up the thread for the video stream
        # camera_controller = threading.Thread(target=self.readCameraThread, args=(self.brahim,))
        # camera_controller.setDaemon(True)
        # camera_controller.start()

        # Set up the thread for the GPS stream
        camera_controller = threading.Thread(
            target=self.readGpsThread, args=(self.brahim, ))  ## refactoring
        camera_controller.setDaemon(True)
        camera_controller.start()

        # Set up the Thread for the data recording
        self.recordingThread = RecordingThreadController(
            self, self.video_destination_dir, self.gps_destination_dir,
            self.gui)

        self.video_output = True
        # Start the periodic call in the GUI .

        self.fps.start()
        self.periodicCall()
示例#26
0
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-n",
                "--num-frames",
                type=int,
                default=100,
                help="# of frames to loop over for FPS test")
ap.add_argument("-d",
                "--display",
                type=int,
                default=1,
                help="Whether or not frames should be displayed")
args = vars(ap.parse_args())

time.sleep(2.0)
fps = FPS().start()

Ar = aruco(98)
Ar.start()
st = time.time()
num = 0
# loop over some frames...this time using the threaded stream
while Ar.stopped == False:
    time.sleep(.1)

fps.stop()
#print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
#print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
print(100 / (time.time() - st))
# do a bit of cleanup
cv2.destroyAllWindows()
示例#27
0
from picamera import PiCamera
import argparse
import time
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-n", "--num-frames", type = int, default = 100, help = "Number of frames for FPS test")
ap.add_argument("-d", "--display", type = int, default = 3, help = "Show frames or not")
args = vars(ap.parse_args())

# init the camera!

print "Sampling frames from threaded camera module..."
vs = piStream().start()
time.sleep(2.0)
fps = FPS().start()

while fps._numFrames < args["num_frames"]:
	# grab frame
	frame = vs.read()

	if args["display"] > 0:
		cv2.imshow("Frame", frame)
		key = cv2.waitKey(1) & 0xFF
	
	fps.update()

fps.stop()
print "Elapsed time: ", fps.elapsed()
print "FPS: ", fps.fps()
示例#28
0
    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues 
        if self.camera:
            q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False)
                q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True)
                q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()
            if self.camera:
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
            else:
                if self.image_mode:
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                dx = (w - self.video_size) // 2
                dy = (h - self.video_size) // 2
                video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size]
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length)))
                q_pd_in.send(frame_nn)
                pd_rtrip_time = now()

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Hand landmarks
            if self.use_lm:
                for i,r in enumerate(self.regions):
                    img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length)
                    nn_data = dai.NNData()   
                    nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length)))
                    q_lm_in.send(nn_data)
                    if i == 0: lm_rtrip_time = now() # We measure only for the first region
                
                # Retrieve hand landmarks
                for i,r in enumerate(self.regions):
                    inference = q_lm_out.get()
                    if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
                    nb_lm_inferences += 1

                
            self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100))
            cv2.imshow("video", annotated_frame)

            key = cv2.waitKey(1) 
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_handedness = not self.show_handedness
            elif key == ord('6'):
                self.show_scores = not self.show_scores
            elif key == ord('7'):
                self.show_gesture = not self.show_gesture

        # Print some stats
        if not self.camera:
            print(f"# video files frames                 : {seq_num}")
            print(f"# palm detection inferences received : {nb_pd_inferences}")
            print(f"# hand landmark inferences received  : {nb_lm_inferences}")
            print(f"Palm detection round trip            : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms")
            print(f"Hand landmark round trip             : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
示例#29
0
        video.set(cv2.CAP_PROP_FRAME_WIDTH, w)
        video.set(cv2.CAP_PROP_FRAME_HEIGHT, h)

    ok, frame = video.read()
    h, w, _ = frame.shape
    if args.output:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        out = cv2.VideoWriter(args.output, fourcc, 30, (w, h))

    my_op = OP(openpose_rendering=args.rendering,
               number_people_max=args.number_people_max,
               min_size=60,
               face_detection=args.face,
               frt=args.frt)

    fps = FPS()
    while True:
        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break
        fps.update()
        frame = frame.copy()
        nb_persons, body_kps, face_kps = my_op.eval(frame)
        #print(2)
        my_op.draw_body(frame)
        if args.face:
            my_op.draw_face(frame)
            my_op.draw_eyes(frame)

        fps.display(frame)
示例#30
0
    def __init__(self,
                 use_face_tracking=True,
                 kbd_layout="QWERTY",
                 write_log_data=False,
                 media_directory="media",
                 child_cnx=None,
                 log_level=None):

        self.log_level = log_level
        self.debug = log_level is not None
        self.child_cnx = child_cnx
        self.use_multiprocessing = child_cnx is not None
        self.kbd_layout = kbd_layout
        # Flight data
        self.is_flying = False
        self.battery = None
        self.fly_mode = None
        self.throw_fly_timer = 0

        self.tracking_after_takeoff = False
        self.record = False
        self.keydown = False
        self.date_fmt = '%Y-%m-%d_%H%M%S'
        self.drone = tellopy.Tello(
            start_recv_thread=not self.use_multiprocessing)
        self.axis_command = {
            "yaw": self.drone.clockwise,
            "roll": self.drone.right,
            "pitch": self.drone.forward,
            "throttle": self.drone.up
        }
        self.axis_speed = {"yaw": 0, "roll": 0, "pitch": 0, "throttle": 0}
        self.cmd_axis_speed = {"yaw": 0, "roll": 0, "pitch": 0, "throttle": 0}
        self.prev_axis_speed = self.axis_speed.copy()
        self.def_speed = {"yaw": 50, "roll": 35, "pitch": 35, "throttle": 80}

        self.write_log_data = write_log_data
        self.reset()
        self.media_directory = media_directory
        if not os.path.isdir(self.media_directory):
            os.makedirs(self.media_directory)

        if self.write_log_data:
            path = 'tello-%s.csv' % datetime.datetime.now().strftime(
                '%Y-%m-%d_%H%M%S')
            self.log_file = open(path, 'w')
            self.write_header = True

        self.init_drone()
        if not self.use_multiprocessing:
            self.init_sounds()
            self.init_controls()

        # container for processing the packets into frames
        self.container = av.open(self.drone.get_video_stream())
        self.vid_stream = self.container.streams.video[0]
        self.out_file = None
        self.out_stream = None
        self.out_name = None
        self.start_time = time.time()

        # Setup Openpose
        if not self.use_multiprocessing:

            self.op = OP(number_people_max=1, min_size=25, debug=self.debug)
        self.use_openpose = False

        self.morse = CameraMorse(display=False)
        self.morse.define_command("---", self.delayed_takeoff)
        self.morse.define_command("...", self.throw_and_go, {'tracking': True})
        self.is_pressed = False

        self.fps = FPS()

        self.exposure = 0

        if self.debug:
            self.graph_pid = RollingGraph(window_name="PID",
                                          step_width=2,
                                          width=2000,
                                          height=500,
                                          y_max=200,
                                          colors=[(255, 255, 255),
                                                  (255, 200, 0), (0, 0, 255),
                                                  (0, 255, 0)],
                                          thickness=[2, 2, 2, 2],
                                          threshold=100,
                                          waitKey=False)

        # Logging
        self.log_level = log_level
        if log_level is not None:
            if log_level == "info":
                log_level = logging.INFO
            elif log_level == "debug":
                log_level = logging.DEBUG
            log.setLevel(log_level)
            ch = logging.StreamHandler(sys.stdout)
            ch.setLevel(log_level)
            ch.setFormatter(
                logging.Formatter(
                    fmt=
                    '%(asctime)s.%(msecs)03d - %(name)s - %(levelname)s - %(message)s',
                    datefmt="%H:%M:%S"))
            log.addHandler(ch)
示例#31
0
def main_core(detector, embedder, recognizer, le, frame_queue, pframe_queue):

    print('[INFO] Starting:', mp.current_process().name)
    # time.sleep(1.0)

    # initialize the video stream, then allow the camera sensor to warm up

    ## Set Threding to start filming
    video_getter = VideoGet(frame_queue=frame_queue,
                            src=0,
                            name='Video Getter')
    time.sleep(1.0)
    # print('[INFO] Starting VideoGet...')
    video_getter.start()
    time.sleep(1.0)

    cpt = 0
    exitbool = False
    fps_count = FPS(2).start()
    while True:

        frame = video_getter.frame.copy()
        fps_count.istime(
        )  # Calculate if enough time has passed for process the frame

        if fps_count.boolFrames:

            fps_count.updateactualtime()  # Update the comparison time

            face_data = acquire_frame(detector, embedder, frame, recognizer,
                                      le, 0.5, 0.65)  #,fa)
            pframe_queue.put(face_data)
            for item in face_data:
                # print(item[2:])
                frame = draw_frame(frame, item)
            fps_count.update()

            # cpt +=1

        exitbool = show_frame(frame)

        if exitbool or cpt == 80:
            #
            fps_count.stop()
            print("[INFO] elasped time fps processed: {:.2f}".format(
                fps_count.elapsed()))
            print("[INFO] approx. processed FPS: {:.2f}".format(
                fps_count.fps()))
            time.sleep(1)
            video_getter.stop()
            time.sleep(2)
            print('[INFO] Exiting :', mp.current_process().name)
            break
示例#32
0
print("[INFO] sampling THREADED frames from `picamera` module...")
vs = PiVideoStream().start()

#if args["display"] > 0:
#    disp = Display().start()

time.sleep(2.0)

prv = vs.read()
prv = cv2.cvtColor(prv,cv2.COLOR_BGR2GRAY)
prv = cv2.GaussianBlur(prv,(17,17),0)

center_y, center_x  = (item//2 for item in prv.shape)


fps = FPS().start()
 
# loop over some frames...this time using the threaded stream
while fps._numFrames < args["num_frames"]:
	# grab the frame from the threaded video stream and resize it
	# to have a maximum width of 400 pixels
	frame = vs.read()
	#frame = imutils.resize(frame, width=400)
 
	# check to see if the frame should be displayed to our screen

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    result = preprocess(img, prv)

    if args["display"] > 0:
        #disp.preview(result)
示例#33
0
    def __init__(
        self,
        input_src="rgb",
        model=None,
        score_thresh=0.2,
        crop=False,
        internal_fps=None,
        internal_frame_height=640,
        stats=True,
    ):

        self.model = model

        if model == "lightning":
            self.model = str(MOVENET_LIGHTNING_MODEL)
            self.pd_input_length = 192
        elif model == "thunder":
            self.model = str(MOVENET_THUNDER_MODEL)
            self.pd_input_length = 256
        else:
            self.model = model
            if "lightning" in str(model):
                self.pd_input_length = 192
            else:  # Thunder
                self.pd_input_length = 256
        print(f"Using blob file : {self.model}")

        print(
            f"MoveNet imput size : {self.pd_input_length}x{self.pd_input_length}x3"
        )
        self.score_thresh = score_thresh

        self.crop = crop
        self.internal_fps = internal_fps
        self.stats = stats

        if input_src is None or input_src == "rgb" or input_src == "rgb_laconic":
            self.input_type = "rgb"  # OAK* internal color camera
            self.laconic = ("laconic" in input_src
                            )  # Camera frames are not sent to the host
            if internal_fps is None:
                if "thunder" in str(model):
                    self.internal_fps = 12
                else:
                    self.internal_fps = 26
            else:
                self.internal_fps = internal_fps
            print(f"Internal camera FPS set to: {self.internal_fps}")

            self.video_fps = internal_fps  # Used when saving the output in a video file. Should be close to the real fps

            if self.crop:
                self.frame_size, self.scale_nd = find_isp_scale_params(
                    internal_frame_height)
                self.img_h = self.img_w = self.frame_size
            else:
                width, self.scale_nd = find_isp_scale_params(
                    internal_frame_height * 1920 / 1080, is_height=False)
                self.img_h = int(
                    round(1080 * self.scale_nd[0] / self.scale_nd[1]))
                self.img_w = int(
                    round(1920 * self.scale_nd[0] / self.scale_nd[1]))
                self.frame_size = self.img_w
            print(f"Internal camera image size: {self.img_w} x {self.img_h}")

        elif input_src.endswith(".jpg") or input_src.endswith(".png"):
            self.input_type = "image"
            self.img = cv2.imread(input_src)
            self.video_fps = 25
            self.img_h, self.img_w = self.img.shape[:2]
        else:
            self.input_type = "video"
            if input_src.isdigit():
                input_type = "webcam"
                input_src = int(input_src)
            self.cap = cv2.VideoCapture(input_src)
            self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
            self.img_w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            self.img_h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            print("Video FPS:", self.video_fps)

        # Defines the default crop region (pads the full image from both sides to make it a square image)
        # Used when the algorithm cannot reliably determine the crop region from the previous frame.
        box_size = max(self.img_w, self.img_h)
        x_min = (self.img_w - box_size) // 2
        y_min = (self.img_h - box_size) // 2
        self.init_crop_region = CropRegion(x_min, y_min, x_min + box_size,
                                           y_min + box_size, box_size)
        self.crop_region = self.init_crop_region

        self.device = dai.Device(self.create_pipeline())
        # self.device.startPipeline()
        print("Pipeline started")

        # Define data queues
        if self.input_type == "rgb":
            if not self.laconic:
                self.q_video = self.device.getOutputQueue(name="cam_out",
                                                          maxSize=1,
                                                          blocking=False)
            self.q_manip_cfg = self.device.getInputQueue(name="manip_cfg")
        else:
            self.q_pd_in = self.device.getInputQueue(name="pd_in")
        self.q_pd_out = self.device.getOutputQueue(name="pd_out",
                                                   maxSize=4,
                                                   blocking=False)

        self.q_ac_in = self.device.getInputQueue(name="ac_in")
        self.q_ac_out = self.device.getOutputQueue(name="ac_out",
                                                   maxSize=4,
                                                   blocking=False)

        self.fps = FPS()

        self.nb_frames = 0
        self.nb_pd_inferences = 0
print("[INFO] sampling THREADED frames from `picamera` module...")
vs = PiVideoStream().start()

#if args["display"] > 0:
#    disp = Display().start()

time.sleep(2.0)

prv = vs.read()
prv = cv2.cvtColor(prv,cv2.COLOR_BGR2GRAY)
prv = cv2.GaussianBlur(prv,(17,17),0)

center_y, center_x  = (item//2 for item in prv.shape)


fps = FPS().start()
 
# loop over some frames...this time using the threaded stream
while fps._numFrames < args["num_frames"]:
	# grab the frame from the threaded video stream and resize it
	# to have a maximum width of 400 pixels
	frame = vs.read()
	#frame = imutils.resize(frame, width=400)
 
	# check to see if the frame should be displayed to our screen

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        result = preprocess(img, prv)
        
        if args["display"] > 0:
            #disp.preview(result)