def __init__(self, videoPath = '/dev/video0', videoW = 1024, videoH = 768, fontScale = 1.0, verbose = True): self.verbose = verbose self._debug = False if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) logging.info('===============================================================') logging.info('Initializing Video Processor with the following parameters:') logging.info(' - OpenCV Version : {}'.format(cv2.__version__)) logging.info(' - Device Path : {}'.format(videoPath)) logging.info(' - Frame Size : {}x{}'.format(videoW, videoH)) logging.info('===============================================================') # To send message to clients (Browser) self.imageStreamHandler = None self.threadExecutor = None # Video source self.videoData = Video_Data(self, videoPath) self.displayFrame = np.array([]) self.frame_org = np.array([]) # for Frame Rate measurement self.fps = FPS() playback_mode = self.videoData.get_playback_mode() self._playback_sync = (playback_mode == Video_Playback_Mode.Sync) self._fps_target = 30 self._fps_wait = 1000.0/30 self.currentFps = 30.0 # For display self._fontScale = float(fontScale) self._annotate = False # Track states of this object self.set_video_processor_state(VideoProcessorState.Unknown) # OpenVINO self.inference_engine = None self.runInference = 0 self.ioLoop = None self.current_model_data = None
def run(self): global RECTS_ARR # contains current bounding rectangles global fpsProc # fpsProc counter fpsProc = FPS().start() while not program_state.STOP_PROGRAM: if program_state.RUN_MODE: lane = self.lane fpsProc.update() # update fpsProc counter IMG = CAPTURE.read() # Read frame from camera GRAY = cv2.cvtColor( IMG, cv2.COLOR_BGR2GRAY) # Turn image to Grayscale _, THRESHOLD_IMG = cv2.threshold( GRAY, handle_config.WHITE_THRESH, 255, 0) # Run threshold on gray image THRESH_LANE_IMG = THRESHOLD_IMG[ handle_config.LANE_HEIGHT_START:handle_config. LANE_HEIGHT_END, handle_config. LANE_WIDTH_START[lane]:handle_config.LANE_WIDTH_END[lane]] # run opencv find contours, only external boxes _, CONTOURS, _ = cv2.findContours(THRESH_LANE_IMG, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if CONTOURS: try: contour = max( CONTOURS, key=cv2.contourArea) # find the biggest area except Exception as e: info_logger.lane_error(lane, CONTOURS, e) rect = cv2.minAreaRect(contour) box = cv2.boxPoints(rect)[:] box = np.int64(box) start_pos = min([position[1] for position in box]) # if area big enough # if contour within top bound if cv2.contourArea(contour) > handle_config.MIN_AREA and \ start_pos > handle_config.EDGE_GAP: for position in box: position[0] += handle_config.LANE_WIDTH_START[lane] position[1] += handle_config.LANE_HEIGHT_START RECTS_ARR[lane] = rect BOX_ARR[lane] = box
def main(): """ Main starting point for the program :return: N/A """ Graphics.init() FPS.init() drawer = Drawer() key_down_listeners = [drawer.on_key_down] mouse_down_listeners = [drawer.on_mouse_down] mouse_up_listeners = [drawer.on_mouse_up] while True: for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: sys.exit() if event.key == pygame.K_F10: Graphics.toggle_fullscreen() for delegate in key_down_listeners: delegate(event.key, event.mod) if event.type == pygame.MOUSEBUTTONDOWN or event.type == pygame.MOUSEBUTTONUP: mouse_position = (event.pos[0] / Graphics.get_scale()[0], event.pos[1] / Graphics.get_scale()[1]) if event.type == pygame.MOUSEBUTTONDOWN: for delegate in mouse_down_listeners: delegate(mouse_position, event.button) if event.type == pygame.MOUSEBUTTONUP: for delegate in mouse_up_listeners: delegate(mouse_position, event.button) drawer.update() drawer.draw() FPS.update() Graphics.flip()
def __init__(self): self.prev_flight_data = None self.record = False self.tracking = False self.keydown = False self.date_fmt = '%Y-%m-%d_%H%M%S' self.speed = 50 self.prev_tracking="tr_off" self.drone = tellopy.Tello() self.init_drone() self.init_controls() self.prev_tracking=0 # container for processing the packets into frames self.container = av.open(self.drone.get_video_stream()) self.vid_stream = self.container.streams.video[0] self.out_stream_writer=None self.out_file = None self.out_stream = None self.out_name = None self.start_time = time.time() self.use_openpose=False self.op = OP(number_people_max=5, min_size=10, debug=None) self.fps = FPS()
def __init__(self, client, cam, name, gui): """Constructor Arguments: videoDirectory {str} -- Path to the directory containing the videos and the csv files of the frames. gui {GuiPart} -- GUI object to configure the Graphical user interface of the App. """ self.client = client self.cam = cam self.name = name self.duration = 10 self.gui = gui self.video_handler = cv2.VideoWriter() self.record_thread = None self.stop_record_thread = threading.Event() self.fourcc_codec = cv2.VideoWriter_fourcc(*'MJPG') #XVID self.f = None self.fps = FPS()
def __init__(self, arg): super(zed_camera, self).__init__() #cap = VideoStream(src=0, resolution=(2560,720)).start() self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT) self.fps = FPS() self.imgUL = None self.imgUR = None rospy.init_node("stereo_streamer") self.imgL_pub = rospy.Publisher("imgL", Image, queue_size=10) self.imgR_pub = rospy.Publisher("imgR", Image, queue_size=10) self.left_matcher = cv2.StereoSGBM_create( minDisparity=0, numDisparities= 160, # max_disp has to be dividable by 16 f. E. HH 192, 256 blockSize=5, disp12MaxDiff=1, uniquenessRatio=15, speckleWindowSize=0, speckleRange=2, preFilterCap=63, mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY) self.right_matcher = cv2.ximgproc.createRightMatcher(left_matcher) self.wls_filter = cv2.ximgproc.createDisparityWLSFilter( matcher_left=left_matcher) self.wls_filter.setLambda(lmbda) self.wls_filter.setSigmaColor(sigma) self.mapLx, self.mapLy = cv2.initUndistortRectifyMap( camera_matrix_L, dist_coeff_L, RL, PL, (2560 / 2, 720), cv2.CV_32FC1) self.mapRx, self.mapRy = cv2.initUndistortRectifyMap( camera_matrix_R, dist_coeff_R, RR, PR, (2560 / 2, 720), cv2.CV_32FC1)
def update(self): self._last_update += FPS.delta_time() if self._last_update >= self._solve_delay \ and not self._paused \ and not self._search_space.path_found: try: if self._solve_delay == 0: while True: self.path_find_generator.next() else: self.path_find_generator.next() except StopIteration: None self._nodes_surface_dirty = True self._font_surface_dirty = True self._debug_surface_dirty = True self._last_update = 0
def show_camera(): # To flip the image, modify the flip_method parameter (0 and 2 are the most common) print(gstreamer_pipeline(flip_method=0)) vs = WebcamVideoStream(src=gstreamer_pipeline()).start() #cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) fps = FPS().start() take_snapshot = True while True: _ = cv2.namedWindow("CSI Camera", cv2.WINDOW_AUTOSIZE) # Window while cv2.getWindowProperty("CSI Camera", 0) >= 0: original_img = vs.read() if take_snapshot: save_snapshot(original_img, "original") take_snapshot = False filter = (60, 87, 120, 255, 50, 255) img = apply_hsv_filter(original_img, filter) img = erode(img, 1) img = dilate(img, 1) targets = find_contours(img) brColor = (255, 255, 255) for contour in targets: rr = cv2.minAreaRect(contour) pt = get_goal_center(rr) cv2.circle(original_img, pt, 6, brColor, 3) cv2.imshow("CSI Camera", original_img) # This also acts as keyCode = cv2.waitKey(30) & 0xFF # Stop the program on the ESC key if keyCode == 27: break fps.stop() print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) cap.release() cv2.destroyAllWindows() else: print("Unable to open camera")
args = vars(ap.parse_args()) #init camera and stream camera = PiCamera() camera.resolution = (640, 480) #camera.framerate = 32 rawCapture = PiRGBArray(camera, size=(640, 480)) stream = camera.capture_continuous(rawCapture, format='bgr', use_video_port=True) camera.close() vs = PiVideoStream(resolution=(640, 480)).start() time.sleep(2.0) fps = FPS().start() while fps._numFrames < args["num_frames"]: #grab frame from threaded video stream and resize it frame = vs.read() frame = imutils.resize(frame, width=640) #check to see if frame should be displayed if args["display"] > 0: cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF #update fps counter fps.update #stop timer and display FPS info
def run(self): self.fps = FPS(mean_nb_frames=20) nb_frames = 0 nb_pd_inferences = 0 nb_pd_inferences_direct = 0 nb_lm_inferences = 0 nb_lm_inferences_after_landmarks_ROI = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 get_new_frame = True use_previous_landmarks = False global_time = time.perf_counter() while True: if get_new_frame: nb_frames += 1 if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) annotated_frame = video_frame.copy() if not self.force_detection and use_previous_landmarks: self.regions = regions_from_landmarks mpu.detections_to_rect( self.regions, kp_pair=[0, 1] ) # self.regions.pd_kps are initialized from landmarks on previous frame mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) else: # Infer pose detection # Resize image to NN square input shape frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h), interpolation=cv2.INTER_AREA) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] pd_rtrip_time = now() inference = self.pd_exec_net.infer( inputs={self.pd_input_blob: frame_nn}) glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 if get_new_frame: nb_pd_inferences_direct += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) if self.force_detection: for r in self.regions: frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) elif len(self.regions) == 1: r = self.regions[0] frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 if use_previous_landmarks: nb_lm_inferences_after_landmarks_ROI += 1 self.lm_postprocess(r, inference) if not self.force_detection: if get_new_frame: if not use_previous_landmarks: # With a new frame, we have run the landmark NN on a ROI found by the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # ...and could not find a body # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved) # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame get_new_frame = False use_previous_landmarks = False continue else: # On a frame on which we already ran the landmark NN without founding a body, # we have run the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks use_previous_landmarks = True # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True # else: # ...and could not find a body # We are sure there is no body in that frame get_new_frame = True self.lm_render(annotated_frame, r) else: # Detection NN hasn't found any body get_new_frame = True self.fps.update() if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps elif key == ord('s'): self.show_segmentation = not self.show_segmentation # Print some stats print( f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})" ) print( f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}" ) print( f"# landmark inferences : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}" ) print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
cam_infoL.P = PL cam_infoR = CameraInfo() cam_infoR.height = CAMERA_HEIGHT cam_infoR.width = CAMERA_WIDTH / 2 cam_infoR.distortion_model = "plumb_bob" cam_infoR.K = KR cam_infoR.D = DR cam_infoR.R = RR cam_infoR.P = PR cap = cv2.VideoCapture(2) cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT) fps = FPS() while True: _, frame = cap.read() if frame != None: height, width, channels = frame.shape frameL = frame[0:height, 0:width / 2] frameR = frame[0:height, width / 2:width] imgL_pub.publish(bridge.cv2_to_imgmsg(frameL, "bgr8")) imgR_pub.publish(bridge.cv2_to_imgmsg(frameR, "bgr8")) camL_pub.publish(cam_infoL) camR_pub.publish(cam_infoR)
def main(): # initialize the video stream and allow the camera sensor to warm up print("[INFO] starting video stream...") # cap = cv2.VideoCapture(0) vs = WebcamVideoStream(src=0).start() fps = FPS().start() #Notes the start time width = 440 with open("consumer_thread.csv", 'w', newline='') as file: writer = csv.writer(file) writer.writerow([ "Thread Frame #", "Time spent in reading the frame (seconds) from queue", "Time spent performing inference on the frame (seconds)" ]) # loop over the frames from the video stream #while True: while fps._numFrames < args["num_frames"]: # grab the frame from the threaded video stream and resize it # to have a maximum width of 400 pixels # Capture frame-by-frame start = timer() frame = vs.readFromQueue() end = timer() # if frame is not None then there was atleast one frame in queue # when read from the queue and returned. Else queue was empty. if frame is not None: # update the FPS counter fps.update() consumerThreadFrameNumber = fps._numFrames consumerThreadTimeTakenToReadThisFrame = (end - start) print( "[INFO] Consumer Thread : Time taken to read frame number", consumerThreadFrameNumber, "from queue is", consumerThreadTimeTakenToReadThisFrame, "seconds") height = frame.shape[0] dim = (width, height) frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA) # detect faces in the frame and determine if they are wearing a # face mask or not startInferenceTime = timer() (locs, preds) = detect_and_predict_mask(frame, net, model) endInferenceTime = timer() consumerThreadTimeTakenToPerformInference = ( endInferenceTime - startInferenceTime) print( "[INFO] Consumer Thread : Time taken to performing inference on consumed frame number", consumerThreadFrameNumber, "is", consumerThreadTimeTakenToPerformInference, "seconds") writer.writerow([ consumerThreadFrameNumber, consumerThreadTimeTakenToReadThisFrame, consumerThreadTimeTakenToPerformInference ]) for (box, pred) in zip(locs, preds): # unpack the bounding box and predictions (startX, startY, endX, endY) = box (mask, withoutMask) = pred label = "Mask" if mask > withoutMask else "No Mask" color = (0, 255, 0) if label == "Mask" else (0, 0, 255) # include the probability in the label #label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100) # display the label and bounding box rectangle on the output # frame cv2.putText(frame, label, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2) cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2) print("Showing frame") # show the output frame cv2.imshow("Output", frame) #cv2.destroyAllWindows() #key = cv2.waitKey(10) & 0xFF key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break fps.stop() vs.stop() print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) cv2.destroyAllWindows()
def run(self): global THRESHOLD_IMG # Thresholded image global RECTS_ARR # contains current bounding rectangles global PASS_COUNTS, FAIL_COUNTS # total counts global AVG_WIDTHS_TOTAL, AVG_HEIGHTS_TOTAL # average width/height global fpsUI # fps counter fpsUI = FPS().start() update_display = False while not program_state.STOP_PROGRAM: fpsUI.update() CROPPED = CAPTURE.read( ) # [handle_config.FRAME_HEIGHT_START:handle_config.FRAME_HEIGHT_END, handle_config.FRAME_WIDTH_START:handle_config.FRAME_WIDTH_END] GRAY = cv2.cvtColor(CROPPED, cv2.COLOR_BGR2GRAY) # Turn image to Grayscale _, THRESHOLD_IMG = cv2.threshold(GRAY, handle_config.WHITE_THRESH, 255, 0) # Run threshold on gray image DISPLAY_IMG = CROPPED if program_state.THRESH_MODE: DISPLAY_IMG = THRESHOLD_IMG else: for lane in range(1, 2): # handle_config.LANE_COUNT): update_display = False if len(RECTS_ARR[lane]) > 0 and len(BOX_ARR[lane]) > 0: color = handle_config.RED current_rect = RECTS_ARR[lane] current_box = BOX_ARR[lane] if current_rect[1][0] > current_rect[1][1]: w = current_rect[1][0] h = current_rect[1][1] else: w = current_rect[1][1] h = current_rect[1][0] calc_dimensions = variables.dimension_calc(lane, w, h) if variables.is_pass(lane, w, h): color = handle_config.GREEN start_pos = min( [position[0] for position in current_box]) high_pos = min( [position[1] for position in current_box]) highest_pos = max( [position[1] for position in current_box]) low_pos = max( [position[1] for position in current_box]) + 15 exiting_box = handle_config.LANE_HEIGHT_START + \ handle_config.LANE_HEIGHT - \ handle_config.EDGE_GAP if highest_pos > exiting_box: cv2.drawContours(CROPPED, [current_box], 0, handle_config.ORANGE, 2) pass else: cv2.drawContours(CROPPED, [current_box], 0, color, 2) cv2.putText(CROPPED, calc_dimensions, (start_pos, high_pos), handle_config.FONT, 1, color, 2) update_display = True if program_state.CALIBRATE_MODE: pixel_dimensions = '{0}px x {1}px'.format( int(w), int(h)) cv2.putText(CROPPED, pixel_dimensions, (start_pos, low_pos), handle_config.FONT, 1, handle_config.RED, 2) if program_state.REQUEST_CALIBRATE: for lane in range( handle_config.LANE_COUNT): # loop through lanes if RECTS_ARR[lane]: # if lane has contour current_rect = RECTS_ARR[lane] if current_rect[1][0] > current_rect[1][1]: w = current_rect[1][0] h = current_rect[1][1] else: w = current_rect[1][1] h = current_rect[1][0] handle_config.PIXEL_WIDTHS[lane] = w handle_config.PIXEL_HEIGHTS[lane] = h # Adjust ratios to match calibration for index, width in enumerate(handle_config.PIXEL_WIDTHS): handle_config.WIDTH_RATIOS[ index] = handle_config.ACTUAL_WIDTH / width for index, height in enumerate( handle_config.PIXEL_HEIGHTS): handle_config.HEIGHT_RATIOS[ index] = handle_config.ACTUAL_HEIGHT / height # Update low highs for calibration for index, ratio in enumerate(handle_config.WIDTH_RATIOS): handle_config.LANE_FAIL_WIDTHS_LOW[ index] = handle_config.FAIL_WIDTH_LOW / ratio for index, ratio in enumerate(handle_config.WIDTH_RATIOS): handle_config.LANE_FAIL_WIDTHS_HIGH[ index] = handle_config.FAIL_WIDTH_HIGH / ratio for index, ratio in enumerate(handle_config.HEIGHT_RATIOS): handle_config.LANE_FAIL_HEIGHTS_LOW[ index] = handle_config.FAIL_HEIGHT_LOW / ratio for index, ratio in enumerate(handle_config.HEIGHT_RATIOS): handle_config.LANE_FAIL_HEIGHTS_HIGH[ index] = handle_config.FAIL_HEIGHT_HIGH / ratio handle_config.setValue('CALIBRATION', 'PIXEL_WIDTHS', handle_config.PIXEL_WIDTHS) handle_config.setValue('CALIBRATION', 'PIXEL_HEIGHTS', handle_config.PIXEL_HEIGHTS) program_state.request_calibration(False) # Not Thresh Mode not_calib_statement = ( lane for lane in range(handle_config.LANE_COUNT) if not program_state.CALIBRATE_MODE) for lane in not_calib_statement: AVG_TEXT = '% PASSED: 0' AVG_WIDTHS_TEXT = 'AVG LENGTH: ' + str( int(AVG_WIDTHS_TOTAL[lane][0] * handle_config.WIDTH_RATIOS[lane])) + 'mm' AVG_HEIGHTS_TEXT = 'AVG THICKNESS: ' + str( int(AVG_HEIGHTS_TOTAL[lane][0] * handle_config.HEIGHT_RATIOS[lane])) + 'mm' if PASS_COUNTS[lane] > 0: AVG_TEXT = '% PASSED: ' + str( 100 * PASS_COUNTS[lane] / (PASS_COUNTS[lane] + FAIL_COUNTS[lane])) cv2.putText(CROPPED, 'LANE ' + str(lane + 1), (handle_config.LANE_WIDTH_START[lane], handle_config.TEXT_Y), handle_config.FONT, 1, handle_config.RED, 2) cv2.putText(CROPPED, 'PASS: '******'FAIL: ' + str(FAIL_COUNTS[lane]), (handle_config.LANE_WIDTH_START[lane], handle_config.TEXT_Y + 60), handle_config.FONT, 1, handle_config.RED, 2) cv2.putText(CROPPED, AVG_TEXT, (handle_config.LANE_WIDTH_START[lane], handle_config.TEXT_Y + 90), handle_config.FONT, 1, handle_config.RED, 2) if AVG_WIDTHS_TOTAL[lane][0] > 0: cv2.putText(CROPPED, AVG_WIDTHS_TEXT, (handle_config.LANE_WIDTH_START[lane], handle_config.TEXT_Y + 120), handle_config.FONT, 1, handle_config.RED, 2) if AVG_HEIGHTS_TOTAL[lane][0] > 0: cv2.putText(CROPPED, AVG_HEIGHTS_TEXT, (handle_config.LANE_WIDTH_START[lane], handle_config.TEXT_Y + 150), handle_config.FONT, 1, handle_config.RED, 2) # Show Lane Boundaries cv2.rectangle(CROPPED, (handle_config.LANE_X1, handle_config.LANE_Y1), (handle_config.LANE_X2, handle_config.LANE_Y2), handle_config.YELLOW, 2) cv2.rectangle(CROPPED, (handle_config.SPLIT_X1, handle_config.LANE_Y1), (handle_config.SPLIT_X2, handle_config.LANE_Y2), handle_config.YELLOW, 2) cv2.rectangle(CROPPED, (handle_config.SPLIT_X3, handle_config.LANE_Y1), (handle_config.SPLIT_X4, handle_config.LANE_Y2), handle_config.YELLOW, 2) # Lane Traffic Calculations red_fail = '111111' yellow_fail = '111' # Lane 1 Traffic Light lane_1_traffic_colour = handle_config.GREEN lane_1_history = ''.join(str(e) for e in HISTORICAL_FAILS[0]) if red_fail in lane_1_history: lane_1_traffic_colour = handle_config.RED elif yellow_fail in lane_1_history: lane_1_traffic_colour = handle_config.YELLOW cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_1_X1, handle_config.TRAFFIC_Y1), (handle_config.TRAFFIC_LANE_1_X2, handle_config.TRAFFIC_Y2), lane_1_traffic_colour, -1) # Lane 2 Traffic Light lane_2_traffic_colour = handle_config.GREEN lane_2_history = ''.join(str(e) for e in HISTORICAL_FAILS[1]) if red_fail in lane_2_history: lane_2_traffic_colour = handle_config.RED elif yellow_fail in lane_2_history: lane_2_traffic_colour = handle_config.YELLOW cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_2_X1, handle_config.TRAFFIC_Y1), (handle_config.TRAFFIC_LANE_2_X2, handle_config.TRAFFIC_Y2), lane_2_traffic_colour, -1) # Lane 3 Traffic Light lane_3_traffic_colour = handle_config.GREEN lane_3_history = ''.join(str(e) for e in HISTORICAL_FAILS[2]) if red_fail in lane_3_history: lane_3_traffic_colour = handle_config.RED elif yellow_fail in lane_3_history: lane_3_traffic_colour = handle_config.YELLOW cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_3_X1, handle_config.TRAFFIC_Y1), (handle_config.TRAFFIC_LANE_3_X2, handle_config.TRAFFIC_Y2), lane_3_traffic_colour, -1) # Lane 3 Traffic Light lane_4_traffic_colour = handle_config.GREEN lane_4_history = ''.join(str(e) for e in HISTORICAL_FAILS[3]) if red_fail in lane_4_history: lane_4_traffic_colour = handle_config.RED elif yellow_fail in lane_4_history: lane_4_traffic_colour = handle_config.YELLOW cv2.rectangle(CROPPED, (handle_config.TRAFFIC_LANE_4_X1, handle_config.TRAFFIC_Y1), (handle_config.TRAFFIC_LANE_4_X2, handle_config.TRAFFIC_Y2), lane_4_traffic_colour, -1) # Show Low Cost Automation Banner cv2.putText(CROPPED, 'EasiBake Roll Checker', (760, 100), handle_config.FONT, 3, handle_config.RED, 4) cv2.putText(CROPPED, 'Low Cost Automation Ltd', (900, 160), handle_config.FONT, 2, handle_config.RED, 3) # Show current AIO cv2.putText(CROPPED, 'OUTPUT: ' + str(OUTPUT), (950, 890), handle_config.FONT, 1, handle_config.RED, 2) # Show min/max values max_length = 'MAX LENGTH = ' + str( int(handle_config.FAIL_WIDTH_HIGH)) + 'mm ' min_length = 'MIN LENGTH = ' + str( int(handle_config.FAIL_WIDTH_LOW)) + 'mm ' max_thickness = 'MAX THICKNESS = ' + str( int(handle_config.FAIL_HEIGHT_HIGH)) + 'mm' min_thickness = 'MIN THICKNESS = ' + str( int(handle_config.FAIL_HEIGHT_LOW)) + 'mm' cv2.putText(CROPPED, 'CURRENT REJECT SETTINGS', (240, 1800), handle_config.FONT, 1, handle_config.RED, 2) cv2.line(CROPPED, (50, 1815), (875, 1815), handle_config.RED, 2) cv2.putText(CROPPED, max_length + max_thickness, (50, 1850), handle_config.FONT, 1, handle_config.RED, 2) cv2.putText(CROPPED, min_length + min_thickness, (50, 1900), handle_config.FONT, 1, handle_config.RED, 2) window_name = 'LINE VIEW' if DISPLAY_IMG != [] and update_display: cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) DISPLAY_IMG = cv2.resize(DISPLAY_IMG, (1600, 900), interpolation=cv2.INTER_AREA) cv2.imshow(window_name, DISPLAY_IMG) # Required for loop no need for key read _ = cv2.waitKey(1) & 0xFF
class CamThreadController: """Separate Thread Class responsible of the files recording """ def __init__(self, client, cam, name, gui): """Constructor Arguments: videoDirectory {str} -- Path to the directory containing the videos and the csv files of the frames. gui {GuiPart} -- GUI object to configure the Graphical user interface of the App. """ self.client = client self.cam = cam self.name = name self.duration = 10 self.gui = gui self.video_handler = cv2.VideoWriter() self.record_thread = None self.stop_record_thread = threading.Event() self.fourcc_codec = cv2.VideoWriter_fourcc(*'MJPG') #XVID self.f = None self.fps = FPS() def getNewFiles(self, videoDirectory, id, name): """This Function is a generator of the new files names caller each 15 minutes Arguments: videoDirectory {str} -- Path to the directory containing the csv files of the GPS track log. id {str} -- new id for each file. Returns: [str] -- frames file name. [str] -- video file name. """ frames_filename = videoDirectory + "track_frames_" + self.name + "_" + id + "_" + time.strftime( "%d-%m-%Y-%H-%M-%S") + ".csv" self.frames_recorder = open(frames_filename, 'a') self.frames_recorder.write("frames_id,frames_timestamp \n") self.frames_recorder.close() video_filename = videoDirectory + "track_video_" + self.name + "_" + id + "_" + time.strftime( "%d-%m-%Y-%H-%M-%S") + ".avi" self.video_handler.open( video_filename, self.fourcc_codec, 28, (self.client.camera.width, self.client.camera.height)) return frames_filename, video_filename def openfiles(self, frames_filename): """This function open the files Arguments: frames_filename {str} -- File name of the frames csv file. video_filename {str} -- File name of the video. """ self.frames_recorder = open(frames_filename, 'a') def write(self, videoHandler, frame, framesHandler, frame_id): """ This function write the data into the corresponding directories Arguments: videoHandler {str} -- Video file handler to write the video. framesHandler {str} -- frames file handler to write the csv file. frame_id {integer} -- The id of each frame from the camera. """ framesHandler.write( str(frame_id) + "," + str(datetime.datetime.utcnow()) + "\n") videoHandler.write(frame) framesHandler.close() def record(self, properties, path): """This function is the main thread """ self.fps.start() start = time.time() id = self.getId(properties) frame_id = 0 frames_filename, video_filename = self.getNewFiles(path, id, self.cam) while not self.stop_record_thread.is_set(): ret, frame = self.cam.read() if np.array_equal(self.f, frame): continue self.fps.update() self.f = frame currentTime = time.time() if (int(currentTime - start) >= self.duration): self.fps.stop() print("[INFO] elapsed time: {:.2f}".format(self.fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(self.fps.fps())) id = self.getId(properties) frames_filename, video_filename = self.getNewFiles( path, id, self.name) frame_id = 0 start = time.time() self.fps.start() self.openfiles(frames_filename) self.write(self.video_handler, frame, self.frames_recorder, frame_id) frame_id = frame_id + 1 self.video_handler.release() def start(self, properties, path): """Function to start the recording thread """ self.stop_record_thread.clear() self.record_thread = threading.Thread(target=self.record, args=( properties, path, )) self.record_thread.setDaemon(True) self.record_thread.start() def stop(self): """Function to sto the recording thread """ self.stop_record_thread.set() self.record_thread.join(4) self.record_thread = None self.fps.stop() def isAlive(self): """Function to check if the thread is still alive Returns: [bool] -- Thread running: True, Thread stopped: False """ return self.record_thread.isAlive() def getId(self, properties): """Function to get a new ID each time a file is created Returns: [integer] -- ID """ # Read last availaible id p = Properties() with open(properties, 'rb') as f: p.load(f, 'utf-8') id = str(int(p["video_id"].data)) # Increment the id in the file p["video_id"] = str(int(p["video_id"].data) + 1) with open(properties, 'wb') as f: p.store(f, encoding="utf-8") # Return availaible id return id
class ThreadedClient: """ Launch the main part of the GUI and the differents threads of the Tool. One class responsible of the different control threads """ def __init__(self, master): """ Start the GUI and the asynchronous threads. We are in the main (original) thread of the application, which will later be used by the GUI as well. """ self.master = master self.gps_destination_file_name = "" self.frames_destination_timestamp = "" self.video_destination_file_handler = cv2.VideoWriter() self.gps_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/GPS/" self.video_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/Videos/" self.record = False self.img = None self.frame = None self.frame1 = None self.f = 0 self.check = False self.brahim = 1.0 / 25 # p = Properties() # with open('config.properties', 'rb') as f: # p.load(f, 'utf-8') # print(p["video_id"].data) #### incremtentaiton properties file # p["video_id"]=str(int(p["video_id"].data) +1) # print(p["video_id"].data) # with open('config.properties', 'wb') as f: # p.store(f, encoding="utf-8") self.frame = None # Create the gps queue self.gps_queue = Queue() # principal thread self.running = 1 self.cameras_state = self.find_cam() print(self.cameras_state) if self.cameras_state[0]: self.camera = See3Cam(src=self.cameras_state[1], width=1280, height=720, framerate=30, name="CAM") self.camera.start() if self.cameras_state[2]: self.camera1 = See3Cam(src=self.cameras_state[3], width=1280, height=720, framerate=30, name="CAM1") self.camera1.start() self.fps = FPS() # Set up Gps self.gps = GpsCapture() # Set up the GUI part self.gui = GuiPart(master, self, self.cameras_state, self.gps.running, self.recordData, self.stopRecord) # Set up the thread for the GPS checking gps_controller = threading.Thread(target=self.checkGpsConnection, args=(2, )) gps_controller.setDaemon(True) gps_controller.start() # Set up the thread for the camera checking camera_controller = threading.Thread(target=self.checkCameraConnection, args=(2, )) camera_controller.setDaemon(True) camera_controller.start() # # Set up the thread for the video stream # camera_controller = threading.Thread(target=self.readCameraThread, args=(self.brahim,)) # camera_controller.setDaemon(True) # camera_controller.start() # Set up the thread for the GPS stream camera_controller = threading.Thread( target=self.readGpsThread, args=(self.brahim, )) ## refactoring camera_controller.setDaemon(True) camera_controller.start() # Set up the Thread for the data recording self.recordingThread = RecordingThreadController( self, self.video_destination_dir, self.gps_destination_dir, self.gui) self.video_output = True # Start the periodic call in the GUI . self.fps.start() self.periodicCall() def periodicCall(self): """ Check every 1 ms the connection to the GPS system and camera and send them to GUI part. """ if self.gps.running or self.frame is not None or self.frame1 is not None: self.check = True # Update the GUI of the camera and GPS status self.gui.processIncoming(self.cameras_state, self.gps.running, self.video_output, self.frame, self.frame1) if not self.running: # This is the brutal stop of the system. sys.exit(1) # # Repeat this function every 1 ms # self.f = self.f+1 # print(self.f) self.master.after(1, self.periodicCall) def checkGpsConnection(self, interval=1): """ Thread to Check the port connection and the status of the GPS System every second. """ while True: # Get the GPS port connection verify_gps_connection, gps_port = self.gps.get_port() if not verify_gps_connection: self.gps.running = False self.gps.isConnected = False elif not self.gps.running: self.gps.running = False self.gps.open_gps(gps_port, self.gps.baudrate) else: self.gps.running = True self.gps.isConnected = True time.sleep(interval) def checkCameraConnection(self, interval=1): """ Thread to Check the port connection and the status of the Camera every second. """ while True: self.cameras_state = self.find_cam() if not self.cameras_state[0]: time.sleep(1) continue if not self.cameras_state[2]: time.sleep(1) continue time.sleep(interval) def readCameraThread(self, interval=0.05): while True: if self.cameras_state[0]: try: self.grabbed, self.frame = self.camera.read() except AttributeError: time.sleep(2) if self.cameras_state[2]: try: self.grabbed1, self.frame1 = self.camera1.read() except AttributeError: time.sleep(2) # try: # self.frame = self.detect(self.frame2, net.eval(), transform) # except RuntimeError: # print("Runtime error") # if self.recordingThread.record_thread is not None and self.recordingThread.isAlive(): # print("putting in cam queue") # self.cam_queue.put(self.frame) time.sleep(interval) self.fps.update() def readGpsThread(self, interval=1): while True: if self.gps.running: self.gps.read() time.sleep(interval) # if self.recordingThread.record_thread is not None and self.recordingThread.isAlive(): # print("Putting in gps queue") # self.gps_queue.put(self.gps) def recordData(self): """ This function listens to the record button and starts the recording thread accordingly """ if self.check: if not self.record: self.video_output = False self.recordingThread.start() self.record = True self.gui.btn_record.configure(text="Recording", bg="red") self.gui.progress_bar.start(int( 10000 / 100)) # duration of videos divided by 100 else: print("Alreadiy recording") else: print("Cannot record, There is no device connected !") def stopRecord(self): """ This function listens to the record button and starts the recording thread accordingly """ if self.record: self.video_output = True self.recordingThread.stop() self.record = False self.gui.btn_record.configure(text="Record Data", bg="green") self.gui.progress_bar.stop() else: print("There is no recording") def find_cam(self): camera_indexes = [] cmd = ["/usr/bin/v4l2-ctl", "--list-devices"] out, err = subprocess.Popen(cmd, stdout=PIPE, stderr=PIPE).communicate() out, err = out.strip(), err.strip() for l in [i.split(b'\n\t') for i in out.split(b'\n\n')]: if "See3CAM_CU20" in l[0].decode(encoding="UTF-8"): camera_indexes.append(int(l[1].decode(encoding="UTF-8")[-1])) if camera_indexes.__len__() == 2: return (True, camera_indexes[0], True, camera_indexes[1]) elif camera_indexes.__len__() == 1: return (True, camera_indexes[0], False, None) else: return (False, None, False, None) def detect( self, frame, net, transform ): # We define a detect function that will take as inputs, a frame, a ssd neural network, and a transformation to be applied on the images, and that will return the frame with the detector rectangle. height, width = frame.shape[: 2] # We get the height and the width of the frame. frame_t = transform(frame)[ 0] # We apply the transformation to our frame. x = torch.from_numpy(frame_t).permute( 2, 0, 1) # We convert the frame into a torch tensor. x = Variable(x.unsqueeze( 0)) # We add a fake dimension corresponding to the batch. x = x.cuda() y = net( x ) # We feed the neural network ssd with the image and we get the output y. detections = y.data # We create the detections tensor contained in the output y. scale = torch.Tensor( [width, height, width, height] ) # We create a tensor object of dimensions [width, height, width, height]. for i in range(detections.size(1)): # For every class: if (labelmap[i - 1] == "car") or (labelmap[i - 1] == "person") or ( labelmap[i - 1] == "motorbike") or (labelmap[i - 1] == "bicycle") or (labelmap[i - 1] == "bus"): j = 0 # We initialize the loop variable j that will correspond to the occurrences of the class. while detections[ 0, i, j, 0] >= 0.6: # We take into account all the occurrences j of the class i that have a matching score larger than 0.6. # We get the coordinates of the points at the upper left and the lower right of the detector rectangle. pt = (detections[0, i, j, 1:] * scale).cpu().numpy() topLeft = ( int(pt[0]), int(pt[1]) ) # We get the top Left corner of the ogject detected bottomRight = ( int(pt[2]), int(pt[3]) ) # We get the bottom Right corner of the object detected x, y = topLeft[0], topLeft[ 1] # We get the coordinates of the top Left Corner x = max(x, 0) y = max(y, 0) w, h = bottomRight[0] - topLeft[0], bottomRight[1] - topLeft[ 1] # We get the width and the height of the object detected #print("x = ", x, " y = ", y, "w = ", w , " h = " ,h) cv2.rectangle( frame, topLeft, bottomRight, (0, 0, 0), cv2.FILLED ) # We draw a rectangle around the detected object. cv2.putText( frame, labelmap[i - 1], topLeft, cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA ) # We put the label of the class right above the rectangle. j += 1 # We increment j to get to the next occurrence. return frame # We return the original frame with the detector rectangle and the label around the detected object.
class zed_camera(object): """docstring for zed_camera.""" def __init__(self, arg): super(zed_camera, self).__init__() #cap = VideoStream(src=0, resolution=(2560,720)).start() self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT) self.fps = FPS() self.imgUL = None self.imgUR = None rospy.init_node("stereo_streamer") self.imgL_pub = rospy.Publisher("imgL", Image, queue_size=10) self.imgR_pub = rospy.Publisher("imgR", Image, queue_size=10) self.left_matcher = cv2.StereoSGBM_create( minDisparity=0, numDisparities= 160, # max_disp has to be dividable by 16 f. E. HH 192, 256 blockSize=5, disp12MaxDiff=1, uniquenessRatio=15, speckleWindowSize=0, speckleRange=2, preFilterCap=63, mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY) self.right_matcher = cv2.ximgproc.createRightMatcher(left_matcher) self.wls_filter = cv2.ximgproc.createDisparityWLSFilter( matcher_left=left_matcher) self.wls_filter.setLambda(lmbda) self.wls_filter.setSigmaColor(sigma) self.mapLx, self.mapLy = cv2.initUndistortRectifyMap( camera_matrix_L, dist_coeff_L, RL, PL, (2560 / 2, 720), cv2.CV_32FC1) self.mapRx, self.mapRy = cv2.initUndistortRectifyMap( camera_matrix_R, dist_coeff_R, RR, PR, (2560 / 2, 720), cv2.CV_32FC1) def read(self): _, frame = self.cap.read() height, width, channels = frame.shape frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frameL = frame[0:height, 0:width / 2] self.imgUL = cv2.resize(cv2.remap(frameL, self.mapLx, self.mapLy, cv2.INTER_LINEAR), (0, 0), fx=0.5, fy=0.5) frameR = frame[0:height, width / 2:width] self.imgUR = cv2.resize(cv2.remap(frameR, self.mapRx, self.mapRy, cv2.INTER_LINEAR), (0, 0), fx=0.5, fy=0.5) displ = self.left_matcher.compute(imgUL, imgUR) # .astype(np.float32)/16 dispr = self.right_matcher.compute(imgUR, imgUL) displ = np.int16(displ) dispr = np.int16(dispr) filteredImg = self.wls_filter.filter(displ, imgUL, None, dispr) filteredImg = cv2.normalize(src=filteredImg, dst=filteredImg, beta=0, alpha=255, norm_type=cv2.NORM_MINMAX) filteredImg = np.uint8(filteredImg) self.fps.update() def streem(self): self.imgL_pub.publish(bridge.cv2_to_imgmsg(self.imgUL, "bgr8")) self.imgR_pub.publish(bridge.cv2_to_imgmsg(self.imgUR, "bgr8"))
#out = cv2.VideoWriter('output.avi', -1, 20.0, (640,480)) font = cv2.FONT_HERSHEY_SIMPLEX # this is used to test the relative fps of the threaded and unthreaded options # grab a pointer to the video stream and initialize the FPS counter print("[INFO] sampling frames without Threading...") stream = cv2.VideoCapture('test.avi') #proc = Process(target=frame_display, args=(taskqueue, outqueue)) #proc.start() fps = FPS() fps.start() # loop over some frames while fps._numFrames < 100: # grab the frame from the stream and resize it to have a maximum # width of 400 pixels (grabbed, frame) = stream.read() frame = cv2.resize(frame, (640,400)) if grabbed: out.write(frame) #cv2.imshow("Frame", frame) #key = cv2.waitKey(1) & 0xFF fps.update()
class BlazeposeOpenvino: def __init__(self, input_src=None, pd_xml=POSE_DETECTION_MODEL, pd_device="CPU", pd_score_thresh=0.5, pd_nms_thresh=0.3, lm_xml=FULL_BODY_LANDMARK_MODEL, lm_device="CPU", lm_score_threshold=0.7, full_body=True, use_gesture=False, smoothing=True, filter_window_size=5, filter_velocity_scale=10, show_3d=False, crop=False, multi_detection=False, force_detection=False, output=None): self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_score_threshold = lm_score_threshold self.full_body = full_body self.use_gesture = use_gesture self.smoothing = smoothing self.show_3d = show_3d self.crop = crop self.multi_detection = multi_detection self.force_detection = force_detection if self.multi_detection: print( "Warning: with multi-detection, smoothing filter is disabled and pose detection is forced on every frame." ) self.smoothing = False self.force_detection = True if input_src.endswith('.jpg') or input_src.endswith('.png'): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 video_height, video_width = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) # The full body landmark model predict 39 landmarks. # We are interested in the first 35 landmarks # from 1 to 33 correspond to the well documented body parts, # 34th (mid hips) and 35th (a point above the head) are used to predict ROI of next frame # Same for upper body model but with 8 less landmarks self.nb_lms = 35 if self.full_body else 27 if self.smoothing: self.filter = mpu.LandmarksSmoothingFilter(filter_window_size, filter_velocity_scale, (self.nb_lms - 2, 3)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Load Openvino models self.load_models(pd_xml, pd_device, lm_xml, lm_device) # Rendering flags self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture self.show_fps = True self.show_segmentation = False if self.show_3d: self.vis3d = o3d.visualization.Visualizer() self.vis3d.create_window() opt = self.vis3d.get_render_option() opt.background_color = np.asarray([0, 0, 0]) z = min(video_height, video_width) / 3 self.grid_floor = create_grid([0, video_height, -z], [video_width, video_height, -z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.grid_wall = create_grid([0, 0, z], [video_width, 0, z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.vis3d.add_geometry(self.grid_floor) self.vis3d.add_geometry(self.grid_wall) view_control = self.vis3d.get_view_control() view_control.set_up(np.array([0, -1, 0])) view_control.set_front(np.array([0, 0, -1])) if output is None: self.output = None else: fourcc = cv2.VideoWriter_fourcc(*"MJPG") self.output = cv2.VideoWriter(output, fourcc, self.video_fps, (video_width, video_height)) def load_models(self, pd_xml, pd_device, lm_xml, lm_device): print("Loading Inference Engine") self.ie = IECore() print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) # Pose detection model pd_name = os.path.splitext(pd_xml)[0] pd_bin = pd_name + '.bin' print( "Pose Detection model - Reading network files:\n\t{}\n\t{}".format( pd_xml, pd_bin)) self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin) # Input blob: input - shape: [1, 3, 128, 128] # Output blob: classificators - shape: [1, 896, 1] : scores # Output blob: regressors - shape: [1, 896, 12] : bboxes self.pd_input_blob = next(iter(self.pd_net.input_info)) print( f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}" ) _, _, self.pd_h, self.pd_w = self.pd_net.input_info[ self.pd_input_blob].input_data.shape for o in self.pd_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}") self.pd_scores = "classificators" self.pd_bboxes = "regressors" print("Loading pose detection model into the plugin") self.pd_exec_net = self.ie.load_network(network=self.pd_net, num_requests=1, device_name=pd_device) self.pd_infer_time_cumul = 0 self.pd_infer_nb = 0 self.infer_nb = 0 self.infer_time_cumul = 0 # Landmarks model if lm_device != pd_device: print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) lm_name = os.path.splitext(lm_xml)[0] lm_bin = lm_name + '.bin' print("Landmark model - Reading network files:\n\t{}\n\t{}".format( lm_xml, lm_bin)) self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin) # Input blob: input_1 - shape: [1, 3, 256, 256] # Output blob: ld_3d - shape: [1, 195] for full body or [1, 155] for upper body # Output blob: output_poseflag - shape: [1, 1] # Output blob: output_segmentation - shape: [1, 1, 128, 128] self.lm_input_blob = next(iter(self.lm_net.input_info)) print( f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}" ) _, _, self.lm_h, self.lm_w = self.lm_net.input_info[ self.lm_input_blob].input_data.shape for o in self.lm_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}") self.lm_score = "output_poseflag" self.lm_segmentation = "output_segmentation" self.lm_landmarks = "ld_3d" print("Loading landmark model to the plugin") self.lm_exec_net = self.ie.load_network(network=self.lm_net, num_requests=1, device_name=lm_device) self.lm_infer_time_cumul = 0 self.lm_infer_nb = 0 def pd_postprocess(self, inference): scores = np.squeeze(inference[self.pd_scores]) # 896 bboxes = inference[self.pd_bboxes][0] # 896x12 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors, best_only=not self.multi_detection) # Non maximum suppression (not needed if best_only is True) if self.multi_detection: self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) mpu.detections_to_rect(self.regions, kp_pair=[0, 1] if self.full_body else [2, 3]) mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) def pd_render(self, frame): for r in self.regions: if self.show_pd_box: box = (np.array(r.pd_box) * self.frame_size).astype(int) cv2.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2) if self.show_pd_kps: # Key point 0 - mid hip center # Key point 1 - point that encodes size & rotation (for full body) # Key point 2 - mid shoulder center # Key point 3 - point that encodes size & rotation (for upper body) if self.full_body: # Only kp 0 and 1 used list_kps = [0, 1] else: # Only kp 2 and 3 used for upper body list_kps = [2, 3] for kp in list_kps: x = int(r.pd_kps[kp][0] * self.frame_size) y = int(r.pd_kps[kp][1] * self.frame_size) cv2.circle(frame, (x, y), 3, (0, 0, 255), -1) cv2.putText(frame, str(kp), (x, y + 12), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2) if self.show_scores and r.pd_score is not None: cv2.putText(frame, f"Pose score: {r.pd_score:.2f}", (50, self.frame_size // 2), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) def lm_postprocess(self, region, inference): region.lm_score = np.squeeze(inference[self.lm_score]) if region.lm_score > self.lm_score_threshold: self.nb_active_regions += 1 lm_raw = inference[self.lm_landmarks].reshape(-1, 5) # Each keypoint have 5 information: # - X,Y coordinates are local to the region of # interest and range from [0.0, 255.0]. # - Z coordinate is measured in "image pixels" like # the X and Y coordinates and represents the # distance relative to the plane of the subject's # hips, which is the origin of the Z axis. Negative # values are between the hips and the camera; # positive values are behind the hips. Z coordinate # scale is similar with X, Y scales but has different # nature as obtained not via human annotation, by # fitting synthetic data (GHUM model) to the 2D # annotation. # - Visibility, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame and not occluded by another bigger body # part or another object. # - Presence, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame. # Normalize x,y,z. Here self.lm_w = self.lm_h and scaling in z = scaling in x = 1/self.lm_w lm_raw[:, :3] /= self.lm_w # Apply sigmoid on visibility and presence (if used later) # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5])) # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box region.landmarks = lm_raw[:, :3] # Calculate the landmark coordinate in square padded image (region.landmarks_padded) src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32) dst = np.array( [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32 ) # region.rect_points[0] is left bottom point and points going clockwise! mat = cv2.getAffineTransform(src, dst) lm_xy = np.expand_dims(region.landmarks[:self.nb_lms, :2], axis=0) lm_xy = np.squeeze(cv2.transform(lm_xy, mat)) # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the # original image. Then I arbitrarily divide by 4 for a more realistic appearance. lm_z = region.landmarks[:self.nb_lms, 2:3] * region.rect_w_a / 4 lm_xyz = np.hstack((lm_xy, lm_z)) if self.smoothing: lm_xyz = self.filter.apply(lm_xyz) region.landmarks_padded = lm_xyz.astype(np.int) # If we added padding to make the image square, we need to remove this padding from landmark coordinates # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed)) region.landmarks_abs = region.landmarks_padded.copy() if self.pad_h > 0: region.landmarks_abs[:, 1] -= self.pad_h if self.pad_w > 0: region.landmarks_abs[:, 0] -= self.pad_w if self.use_gesture: self.recognize_gesture(region) if self.show_segmentation: self.seg = np.squeeze(inference[self.lm_segmentation]) self.seg = 1 / (1 + np.exp(-self.seg)) def lm_render(self, frame, region): if region.lm_score > self.lm_score_threshold: if self.show_segmentation: ret, mask = cv2.threshold(self.seg, 0.5, 1, cv2.THRESH_BINARY) mask = (mask * 255).astype(np.uint8) cv2.imshow("seg", self.seg) # cv2.imshow("mask", mask) src = np.array( [[0, 0], [128, 0], [128, 128]], dtype=np.float32) # rect_points[0] is left bottom point ! dst = np.array(region.rect_points[1:], dtype=np.float32) mat = cv2.getAffineTransform(src, dst) mask = cv2.warpAffine(mask, mat, (self.frame_size, self.frame_size)) # cv2.imshow("mask2", mask) # mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) l = frame.shape[0] frame2 = cv2.bitwise_and(frame, frame, mask=mask) if not self.crop: frame2 = frame2[self.pad_h:l - self.pad_h, self.pad_w:l - self.pad_w] cv2.imshow("Segmentation", frame2) if self.show_rot_rect: cv2.polylines(frame, [np.array(region.rect_points)], True, (0, 255, 255), 2, cv2.LINE_AA) if self.show_landmarks: list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY lines = [ np.array( [region.landmarks_padded[point, :2] for point in line]) for line in list_connections ] cv2.polylines(frame, lines, False, (255, 180, 90), 2, cv2.LINE_AA) for i, x_y in enumerate(region.landmarks_padded[:self.nb_lms - 2, :2]): if i > 10: color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255) elif i == 0: color = (0, 255, 255) elif i in [4, 5, 6, 8, 10]: color = (0, 255, 0) else: color = (0, 0, 255) cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11) if self.show_3d: points = region.landmarks_abs lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY colors = COLORS_FULL_BODY for i, a_b in enumerate(lines): a, b = a_b line = create_segment(points[a], points[b], radius=5, color=colors[i]) if line: self.vis3d.add_geometry(line, reset_bounding_box=False) if self.show_scores: cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}", (region.landmarks_padded[24, 0] - 10, region.landmarks_padded[24, 1] + 90), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) if self.use_gesture and self.show_gesture: cv2.putText(frame, region.gesture, (region.landmarks_padded[6, 0] - 10, region.landmarks_padded[6, 1] - 50), cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3) def recognize_gesture(self, r): def angle_with_y(v): # v: 2d vector (x,y) # Returns angle in degree ofv with y-axis of image plane if v[1] == 0: return 90 angle = atan2(v[0], v[1]) return np.degrees(angle) # For the demo, we want to recognize the flag semaphore alphabet # For this task, we just need to measure the angles of both arms with vertical right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] - r.landmarks_abs[12, :2]) left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] - r.landmarks_abs[11, :2]) right_pose = int((right_arm_angle + 202.5) / 45) left_pose = int((left_arm_angle + 202.5) / 45) r.gesture = semaphore_flag.get((right_pose, left_pose), None) def run(self): self.fps = FPS(mean_nb_frames=20) nb_frames = 0 nb_pd_inferences = 0 nb_pd_inferences_direct = 0 nb_lm_inferences = 0 nb_lm_inferences_after_landmarks_ROI = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 get_new_frame = True use_previous_landmarks = False global_time = time.perf_counter() while True: if get_new_frame: nb_frames += 1 if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) annotated_frame = video_frame.copy() if not self.force_detection and use_previous_landmarks: self.regions = regions_from_landmarks mpu.detections_to_rect( self.regions, kp_pair=[0, 1] ) # self.regions.pd_kps are initialized from landmarks on previous frame mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) else: # Infer pose detection # Resize image to NN square input shape frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h), interpolation=cv2.INTER_AREA) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] pd_rtrip_time = now() inference = self.pd_exec_net.infer( inputs={self.pd_input_blob: frame_nn}) glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 if get_new_frame: nb_pd_inferences_direct += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) if self.force_detection: for r in self.regions: frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) elif len(self.regions) == 1: r = self.regions[0] frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 if use_previous_landmarks: nb_lm_inferences_after_landmarks_ROI += 1 self.lm_postprocess(r, inference) if not self.force_detection: if get_new_frame: if not use_previous_landmarks: # With a new frame, we have run the landmark NN on a ROI found by the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # ...and could not find a body # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved) # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame get_new_frame = False use_previous_landmarks = False continue else: # On a frame on which we already ran the landmark NN without founding a body, # we have run the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks use_previous_landmarks = True # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True # else: # ...and could not find a body # We are sure there is no body in that frame get_new_frame = True self.lm_render(annotated_frame, r) else: # Detection NN hasn't found any body get_new_frame = True self.fps.update() if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps elif key == ord('s'): self.show_segmentation = not self.show_segmentation # Print some stats print( f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})" ) print( f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}" ) print( f"# landmark inferences : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}" ) print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
class VideoProcessor(object): # # Initialization of Video Processor Class # Reads video frame from Video Stream class and process (AI Inference etc) # Set frame data to displayFrame for visualization # def __init__(self, videoPath = '/dev/video0', videoW = 1024, videoH = 768, fontScale = 1.0, verbose = True): self.verbose = verbose self._debug = False if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) logging.info('===============================================================') logging.info('Initializing Video Processor with the following parameters:') logging.info(' - OpenCV Version : {}'.format(cv2.__version__)) logging.info(' - Device Path : {}'.format(videoPath)) logging.info(' - Frame Size : {}x{}'.format(videoW, videoH)) logging.info('===============================================================') # To send message to clients (Browser) self.imageStreamHandler = None self.threadExecutor = None # Video source self.videoData = Video_Data(self, videoPath) self.displayFrame = np.array([]) self.frame_org = np.array([]) # for Frame Rate measurement self.fps = FPS() playback_mode = self.videoData.get_playback_mode() self._playback_sync = (playback_mode == Video_Playback_Mode.Sync) self._fps_target = 30 self._fps_wait = 1000.0/30 self.currentFps = 30.0 # For display self._fontScale = float(fontScale) self._annotate = False # Track states of this object self.set_video_processor_state(VideoProcessorState.Unknown) # OpenVINO self.inference_engine = None self.runInference = 0 self.ioLoop = None self.current_model_data = None # # Sets up Video Processor Class # Creates Video Stream Class for video capture # def __enter__(self): # async def __aenter__(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) self.set_video_path('{{\"set_video_path\":\"{}\"}}'.format(self.videoData.videoPath)) self.inference_engine = OpenVINO_Engine(self) # with OpenVINO_Util() as openVino: # devices = openVino.get_supported_devices() # for device in devices: # logging.info('>> Device : {0}'.format(device)) # fullName = openVino.get_device_name(device) # logging.info('>> Name : {0}'.format(fullName)) # self.inference_engine.hwList.append(device) self.inference_engine.initialize_engine() return self # # Clean up Video Processor Class # def __exit__(self, exception_type, exception_value, traceback): # async def __aexit__(self, exception_type, exception_value, traceback): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.threadExecutor: self.threadExecutor.shutdown(wait=True) self.set_video_processor_state(VideoProcessorState.Stop) # # Send message to browser # def send_message(self, msg): if self.imageStreamHandler: ImageStreamHandler.broadcast_message(msg) # # Set Video Processor State flag # def set_video_processor_state(self, flag): self._state = flag # # Initializes Video Source # def _init_video_source(self): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) raise NotImplementedError # # Sets current video frame for display # def set_display_frame(self, frame): assert frame.size > 0, "Frame Empty" self.displayFrame = frame # # Resturns current video frame for display # Converts to byte data # def get_display_frame(self): if self.displayFrame.size == 0: if self.videoData.get_video_data_state() == Video_Data_State.PhotoReady: if self.videoData.videoH == 0 or self.videoData.videoW == 0: wallpaper = np.zeros((720, 1280, 3), np.uint8) else: wallpaper = np.zeros((self.videoData.videoH, self.videoData.videoW, 3), np.uint8) ret, buffer = cv2.imencode( '.jpg', wallpaper ) else: return None, 0 else: ret, buffer = cv2.imencode( '.jpg', self.displayFrame ) if ret and buffer.size > 0: return buffer.tobytes(), self.currentFps else: assert(False), '>> Display Frame Empty *************** ' # # Resturns Inference Engine Info # def get_inference_engine_info(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.inference_engine: devices = json.dumps(self.inference_engine.get_devices()) if self.runInference == 1: state = "On" else: state = "Off" return '{{\"{0}\":\"{1}\",\"devices\":{2},\"get_inference_state\":\"{3}\"}}'.format(sys._getframe().f_code.co_name, self.inference_engine.signature, devices, state) else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Retrieve a list of models # def get_model_list(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.inference_engine: json_data = json.loads('{\"get_model_list\":[]}') model_list = self.inference_engine.get_model_list() for model in model_list: json_data["get_model_list"].append(json.loads(model.to_json())) else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) return json_data # # Set to keep FPS for video or not # def playback_mode(self, msg): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) jsonData = json.loads(msg) playback_mode = jsonData["playback_mode"] self._playback_sync = playback_mode == "0" self.videoData.set_playback_mode(playback_mode) return '{{\"playback_mode\":\"{0}\"}}'.format(self.videoData.get_playback_mode()) # # Stop video process # def set_video_playback(self, msg): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) jsonData = json.loads(msg) if jsonData['set_video_playback'] == "1": self.set_video_processor_state(VideoProcessorState.Running) else: self.set_video_processor_state(VideoProcessorState.Pause) return self.get_video_playback() # # Return current video playback state # def get_video_playback(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self._state == VideoProcessorState.Pause: state = "0" elif self._state == VideoProcessorState.Running: state = "1" else: assert False, "Unexpected Video Processor State" return '{{\"get_video_playback\":\"{}\"}}'.format(state) # # Stop video process # def set_video_stop(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) self.videoData.set_video_playback(isPause = True) self.set_video_processor_state(VideoProcessorState.Pause) # # Start video process # def set_video_start(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) self.fps.reset(self.videoData.get_video_fps()) self.videoData.set_video_playback(isPause = False) self.set_video_processor_state(VideoProcessorState.Running) self.send_message('{\"frame_ready\":1}') # # Set Video Resolution # def set_video_path(self, msg, loop = None): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) jsonData = json.loads(msg) if jsonData.get("set_video_path"): videoPath = jsonData["set_video_path"] else: videoPath = jsonData["videoPath"] self.set_video_processor_state(VideoProcessorState.Pause) video_data_state = self.videoData.set_video_path(videoPath, loop) if video_data_state == Video_Data_State.Running or video_data_state == Video_Data_State.PhotoReady: self.fps.reset(self.videoData.get_video_fps()) self.set_video_start() else: self.set_video_processor_state(VideoProcessorState.Pause) return self.get_video_path() # # Return current video path # def get_video_path(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) return self.videoData.get_video_path() # # Set Video Resolution # def set_video_resolution(self, msg): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) return self.videoData.set_video_resolution(msg) # # Get Video Resolution # def get_video_resolution(self): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) return self.videoData.get_video_resolution() # # Set AI model to use # async def set_ai_model(self, loop, msg): if self.verbose: logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg)) try: self.ioLoop = loop #1 Get Model Data model_data = self.inference_engine.get_ai_model_data(msg) current_hw = json.loads(self.inference_engine.get_target_device()) current_precision = json.loads(self.inference_engine.get_precision()) if model_data.isFlagSet(Model_Flag.Loaded): json_data = json.loads(msg) device = json_data["set_target_device"] precision = json_data["set_precision"] if current_hw['get_target_device'] == device and current_precision['get_precision'] == precision: logging.info(">> Model {} is loaded to {}".format(model_data.modelName, current_hw)) self.runInference = 1 self.send_message('{{\"set_ai_model\":\"Running {}\",\"isComplete\":1}}'.format(model_data.modelName)) else: if self.current_model_data: self.current_model_data.clearFlag(Model_Flag.Loaded) self.current_model_data = None if not model_data is None: self.set_device_params(msg) # self.set_precision(msg) # self.set_target_device(msg) # create a task to download model from model zoo self.set_video_processor_state(VideoProcessorState.Pause) self.send_message('{{\"set_ai_model\":\"Downloading {}\"}}'.format(model_data.modelName)) task = self.ioLoop.run_in_executor(None, self.inference_engine.download_model, model_data) task.add_done_callback(self.model_download_callback) else: json_data = json.loads(msg) self.send_message('{{\"set_ai_model\":\"Failed to get model data for {}\",\"isFailure\":1}}'.format(json_data["SetAiModel"])) except CancelledError: logging.info('-- {0}() - Cancelled'.format(sys._getframe().f_code.co_name)) except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_obj, exc_tb) logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex)) # # Callback function for model download # def model_download_callback(self, future): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) model_data = future.result() assert(model_data is not None, "Model Data is None") if model_data.isFlagSet(Model_Flag.Downloaded): self.send_message('{{\"set_ai_model\":\"{} downloaded. Converting to IR\"}}'.format(model_data.modelName)) if model_data.framework == 'dldt': task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, model_data) task.add_done_callback(self.model_load_callback) else: task = self.ioLoop.run_in_executor(None, self.inference_engine.convert_model, model_data) task.add_done_callback(self.model_convert_callback) else: self.set_video_start() self.send_message('{{\"set_ai_model\":\"Download failed {}\",\"isFailure\":1}}'.format(model_data.errorMsg)) # # Callback function for model conversion # def model_convert_callback(self, future): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) model_data = future.result() if model_data.isFlagSet(Model_Flag.Converted): logging.info(' FP16 {}'.format(str(model_data.ir_dir['FP16']))) logging.info(' FP32 {}'.format(str(model_data.ir_dir['FP32']))) self.send_message('{{\"set_ai_model\":\"{} converted to IR.\\nLoading....\", \"isSuccess\":1}}'.format(model_data.modelName)) self.inference_engine.remove_model_dir(model_data) task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, model_data) task.add_done_callback(self.model_load_callback) else: self.set_video_start() self.send_message('{{\"set_ai_model\":\"Convert Failed : {}\",\"isFailure\":1}}'.format(model_data.errorMsg)) # # Callback function for model load # def model_load_callback(self, future): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) model_data = future.result() self.set_video_start() if model_data.isFlagSet(Model_Flag.Loaded): target_device = json.loads(self.inference_engine.get_target_device()) self.send_message('{{\"set_ai_model\":\"Successfully loaded {}\", \"isComplete\":1}}'.format(model_data.modelName)) self.send_message('{{\"get_inference_engine_info\":\"{} running on {}\"}}'.format(self.inference_engine.signature, target_device['get_target_device'])) self.current_model_data = model_data else: self.send_message('{{\"set_ai_model\":\"Load failed : {}\",\"isFailure\":1}}'.format(model_data.errorMsg)) # # Set hardware to run inference on # def set_device_params(self, msg, reload = False): if self.verbose: logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg)) if self.inference_engine: self.inference_engine.set_target_device(msg) self.inference_engine.set_precision(msg) if reload == True and self.current_model_data: # create a task to download model from model zoo self.set_video_processor_state(VideoProcessorState.Pause) self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName)) task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data) task.add_done_callback(self.model_download_callback) return self.inference_engine.set_target_device(msg) else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Set hardware to run inference on # # def set_target_device(self, msg): # if self.verbose: # logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg)) # if self.inference_engine: # self.inference_engine.set_target_device(msg) # self.inference_engine.set_precision(msg) # if self.current_model_data: # # create a task to download model from model zoo # self.set_video_processor_state(VideoProcessorState.Pause # self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName)) # task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data) # task.add_done_callback(self.model_download_callback) # return self.inference_engine.set_target_device(msg) # else: # assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) # return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Return hardware to run inference on # def get_target_device(self): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.inference_engine: return self.inference_engine.get_target_device() else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Set Inference Precision # # def set_precision(self, msg): # if self.verbose: # logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg)) # if self.inference_engine: # self.inference_engine.set_precision(msg) # if self.current_model_data: # # create a task to download model from model zoo # self.set_video_processor_state(VideoProcessorState.Pause # self.send_message('{{\"set_ai_model\":\"Loading {}\"}}'.format(self.current_model_data.modelName)) # task = self.ioLoop.run_in_executor(None, self.inference_engine.load_model, self.current_model_data) # task.add_done_callback(self.model_download_callback) # return self.get_precision() # else: # assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) # return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Get Inference Precision # def get_precision(self): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.inference_engine: return self.inference_engine.get_precision() else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Set Confidence Level threshold # def set_confidence_level(self, msg): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) jsonData = json.loads(msg) confidenceLevel = int(jsonData["set_confidence_level"].replace('%','')) if self.inference_engine: return self.inference_engine.set_confidence_level(confidenceLevel) else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Return Confidence Level threshold # def get_confidence_level(self): if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) if self.inference_engine: return self.inference_engine.get_confidence_level() else: assert False, '>> {} : Inference Engine Not Set'.format(sys._getframe().f_code.co_name) return '{{\"{}\":\"Inference Engine Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Set Inference State # def set_inference_state(self, msg): if self.verbose: logging.info('>> {0}:{1}() {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, msg)) jsonData = json.loads(msg) inferenceState = jsonData["set_inference_state"] if self.current_model_data: #make sure model is loaded if not self.current_model_data.isFlagSet(Model_Flag.Loaded): return '{{\"{}\":\"{} is not loaded\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name, self.current_model_data.modelName) else: self.runInference = int(inferenceState) return self.get_inference_state() else: return '{{\"{}\":\"Model Data Not Set\", \"isFailure\":1}}'.format(sys._getframe().f_code.co_name) # # Get Current Inference State # def get_inference_state(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) return '{{\"{}\":\"{}\"}}'.format(sys._getframe().f_code.co_name, self.runInference) async def process_video_frame_async(self, executor): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) try: loop = asyncio.get_event_loop() task = await loop.run_in_executor(executor, self.process_video_frame) return task except CancelledError: logging.info('-- {0}() - Cancelled'.format(sys._getframe().f_code.co_name)) self.set_video_processor_state(VideoProcessorState.Stop) return 0 except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_obj, exc_tb) logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex)) return 1 # # Saves frame data to a file # def save_image(self): cv2.imwrite("./frame.png", self.displayFrame) #cv2.imwrite("./frame.png", self.frame_org) # # Process Video Frame # def process_video_frame(self): if self.verbose: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) textX, textY = cv2.getTextSize("FPS", cv2.FONT_HERSHEY_DUPLEX, self._fontScale, 1)[0] textX = int(textX * self._fontScale * 1.1) textY = int(textY * self._fontScale * 1.1) frame = np.array([]) self.fps.reset(self.videoData.get_video_fps()) while True: try: if self._state == VideoProcessorState.Stop: logging.info('>> {0}:{1}() : Stop Video Processor'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) break if self._state == VideoProcessorState.Pause: if self._debug: logging.info('>> {0}:{1}() : Pause Video Processor'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) time.sleep(1) continue if self.videoData is None: logging.info('>> {0}:{1}() : No Video Data'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) time.sleep(0.5) continue grabbed, frame = self.videoData.read_frame_queue() if self._debug: logging.info("Grabbed {} frame size {}".format(grabbed, frame.size)) # if (grabbed == False or frame.size == 0): if (grabbed == False): time.sleep(1/30) continue else: self.frame_org = np.copy(frame) if self.runInference == 1: # Run Inference frame = self.inference_engine.inference(frame) if self._annotate: fps_annotation = 'FPS : {}'.format(self.currentFps) cv2.putText( frame, fps_annotation, (10, textY + 10), cv2.FONT_HERSHEY_SIMPLEX, self._fontScale, (0,0,255), 2) self.set_display_frame(frame) self.currentFps = self.fps.fps(self._playback_sync) except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_obj, exc_tb) logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex)) if self.verbose: logging.info('<< {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))
class HandTracker: def __init__(self, input_file=None, pd_path="models/palm_detection.blob", pd_score_thresh=0.5, pd_nms_thresh=0.3, use_lm=True, lm_path="models/hand_landmark.blob", lm_score_threshold=0.5, use_gesture=False): self.camera = input_file is None self.pd_path = pd_path self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.use_lm = use_lm self.lm_path = lm_path self.lm_score_threshold = lm_score_threshold self.use_gesture = use_gesture if not self.camera: if input_file.endswith('.jpg') or input_file.endswith('.png') : self.image_mode = True self.img = cv2.imread(input_file) self.video_size = np.min(self.img.shape[:2]) else: self.image_mode = False self.cap = cv2.VideoCapture(input_file) width = self.cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float `width` height = self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float `height` self.video_size = int(min(width, height)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions(num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios= [1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Rendering flags if self.use_lm: self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_handedness = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture else: self.show_pd_box = True self.show_pd_kps = False self.show_rot_rect = False self.show_scores = False def create_pipeline(self): print("Creating pipeline...") # Start defining a pipeline pipeline = dai.Pipeline() self.pd_input_length = 128 if self.camera: # ColorCamera print("Creating Color Camera...") cam = pipeline.createColorCamera() cam.setPreviewSize(self.pd_input_length, self.pd_input_length) cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) # Crop video to square shape (palm detection takes square image as input) self.video_size = min(cam.getVideoSize()) cam.setVideoSize(self.video_size, self.video_size) cam.setFps(30) cam.setInterleaved(False) cam.setBoardSocket(dai.CameraBoardSocket.RGB) cam_out = pipeline.createXLinkOut() cam_out.setStreamName("cam_out") # Link video output to host for higher resolution cam.video.link(cam_out.input) # Define palm detection model print("Creating Palm Detection Neural Network...") pd_nn = pipeline.createNeuralNetwork() pd_nn.setBlobPath(str(Path(self.pd_path).resolve().absolute())) # Increase threads for detection # pd_nn.setNumInferenceThreads(2) # Specify that network takes latest arriving frame in non-blocking manner # Palm detection input if self.camera: pd_nn.input.setQueueSize(1) pd_nn.input.setBlocking(False) cam.preview.link(pd_nn.input) else: pd_in = pipeline.createXLinkIn() pd_in.setStreamName("pd_in") pd_in.out.link(pd_nn.input) # Palm detection output pd_out = pipeline.createXLinkOut() pd_out.setStreamName("pd_out") pd_nn.out.link(pd_out.input) # Define hand landmark model if self.use_lm: print("Creating Hand Landmark Neural Network...") lm_nn = pipeline.createNeuralNetwork() lm_nn.setBlobPath(str(Path(self.lm_path).resolve().absolute())) lm_nn.setNumInferenceThreads(1) # Hand landmark input self.lm_input_length = 224 lm_in = pipeline.createXLinkIn() lm_in.setStreamName("lm_in") lm_in.out.link(lm_nn.input) # Hand landmark output lm_out = pipeline.createXLinkOut() lm_out.setStreamName("lm_out") lm_nn.out.link(lm_out.input) print("Pipeline created.") return pipeline def pd_postprocess(self, inference): scores = np.array(inference.getLayerFp16("classificators"), dtype=np.float16) # 896 bboxes = np.array(inference.getLayerFp16("regressors"), dtype=np.float16).reshape((self.nb_anchors,18)) # 896x18 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors) # Non maximum suppression self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) if self.use_lm: mpu.detections_to_rect(self.regions) mpu.rect_transformation(self.regions, self.video_size, self.video_size) def pd_render(self, frame): for r in self.regions: if self.show_pd_box: box = (np.array(r.pd_box) * self.video_size).astype(int) cv2.rectangle(frame, (box[0], box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), 2) if self.show_pd_kps: for i,kp in enumerate(r.pd_kps): x = int(kp[0] * self.video_size) y = int(kp[1] * self.video_size) cv2.circle(frame, (x, y), 6, (0,0,255), -1) cv2.putText(frame, str(i), (x, y+12), cv2.FONT_HERSHEY_PLAIN, 1.5, (0,255,0), 2) if self.show_scores: cv2.putText(frame, f"Palm score: {r.pd_score:.2f}", (int(r.pd_box[0] * self.video_size+10), int((r.pd_box[1]+r.pd_box[3])*self.video_size+60)), cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0), 2) def recognize_gesture(self, r): # Finger states # state: -1=unknown, 0=close, 1=open d_3_5 = mpu.distance(r.landmarks[3], r.landmarks[5]) d_2_3 = mpu.distance(r.landmarks[2], r.landmarks[3]) angle0 = mpu.angle(r.landmarks[0], r.landmarks[1], r.landmarks[2]) angle1 = mpu.angle(r.landmarks[1], r.landmarks[2], r.landmarks[3]) angle2 = mpu.angle(r.landmarks[2], r.landmarks[3], r.landmarks[4]) r.thumb_angle = angle0+angle1+angle2 if angle0+angle1+angle2 > 460 and d_3_5 / d_2_3 > 1.2: r.thumb_state = 1 else: r.thumb_state = 0 if r.landmarks[8][1] < r.landmarks[7][1] < r.landmarks[6][1]: r.index_state = 1 elif r.landmarks[6][1] < r.landmarks[8][1]: r.index_state = 0 else: r.index_state = -1 if r.landmarks[12][1] < r.landmarks[11][1] < r.landmarks[10][1]: r.middle_state = 1 elif r.landmarks[10][1] < r.landmarks[12][1]: r.middle_state = 0 else: r.middle_state = -1 if r.landmarks[16][1] < r.landmarks[15][1] < r.landmarks[14][1]: r.ring_state = 1 elif r.landmarks[14][1] < r.landmarks[16][1]: r.ring_state = 0 else: r.ring_state = -1 if r.landmarks[20][1] < r.landmarks[19][1] < r.landmarks[18][1]: r.little_state = 1 elif r.landmarks[18][1] < r.landmarks[20][1]: r.little_state = 0 else: r.little_state = -1 # Gesture if r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 1 and r.little_state == 1: r.gesture = "FIVE" elif r.thumb_state == 0 and r.index_state == 0 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0: r.gesture = "FIST" elif r.thumb_state == 1 and r.index_state == 0 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0: r.gesture = "OK" elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 0 and r.little_state == 0: r.gesture = "PEACE" elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0: r.gesture = "ONE" elif r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 0 and r.ring_state == 0 and r.little_state == 0: r.gesture = "TWO" elif r.thumb_state == 1 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 0 and r.little_state == 0: r.gesture = "THREE" elif r.thumb_state == 0 and r.index_state == 1 and r.middle_state == 1 and r.ring_state == 1 and r.little_state == 1: r.gesture = "FOUR" else: r.gesture = None def lm_postprocess(self, region, inference): region.lm_score = inference.getLayerFp16("Identity_1")[0] region.handedness = inference.getLayerFp16("Identity_2")[0] lm_raw = np.array(inference.getLayerFp16("Identity_dense/BiasAdd/Add")) lm = [] for i in range(int(len(lm_raw)/3)): # x,y,z -> x/w,y/h,z/w (here h=w) lm.append(lm_raw[3*i:3*(i+1)]/self.lm_input_length) region.landmarks = lm if self.use_gesture: self.recognize_gesture(region) def lm_render(self, frame, region): if region.lm_score > self.lm_score_threshold: if self.show_rot_rect: cv2.polylines(frame, [np.array(region.rect_points)], True, (0,255,255), 2, cv2.LINE_AA) if self.show_landmarks: src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32) dst = np.array([ (x, y) for x,y in region.rect_points[1:]], dtype=np.float32) # region.rect_points[0] is left bottom point ! mat = cv2.getAffineTransform(src, dst) lm_xy = np.expand_dims(np.array([(l[0], l[1]) for l in region.landmarks]), axis=0) lm_xy = np.squeeze(cv2.transform(lm_xy, mat)).astype(np.int) list_connections = [[0, 1, 2, 3, 4], [0, 5, 6, 7, 8], [5, 9, 10, 11, 12], [9, 13, 14 , 15, 16], [13, 17], [0, 17, 18, 19, 20]] lines = [np.array([lm_xy[point] for point in line]) for line in list_connections] cv2.polylines(frame, lines, False, (255, 0, 0), 2, cv2.LINE_AA) if self.use_gesture: # color depending on finger state (1=open, 0=close, -1=unknown) color = { 1: (0,255,0), 0: (0,0,255), -1:(0,255,255)} radius = 6 cv2.circle(frame, (lm_xy[0][0], lm_xy[0][1]), radius, color[-1], -1) for i in range(1,5): cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.thumb_state], -1) for i in range(5,9): cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.index_state], -1) for i in range(9,13): cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.middle_state], -1) for i in range(13,17): cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.ring_state], -1) for i in range(17,21): cv2.circle(frame, (lm_xy[i][0], lm_xy[i][1]), radius, color[region.little_state], -1) else: for x,y in lm_xy: cv2.circle(frame, (x, y), 6, (0,128,255), -1) if self.show_handedness: cv2.putText(frame, f"RIGHT {region.handedness:.2f}" if region.handedness > 0.5 else f"LEFT {1-region.handedness:.2f}", (int(region.pd_box[0] * self.video_size+10), int((region.pd_box[1]+region.pd_box[3])*self.video_size+20)), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0) if region.handedness > 0.5 else (0,0,255), 2) if self.show_scores: cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}", (int(region.pd_box[0] * self.video_size+10), int((region.pd_box[1]+region.pd_box[3])*self.video_size+90)), cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0), 2) if self.use_gesture and self.show_gesture: cv2.putText(frame, region.gesture, (int(region.pd_box[0]*self.video_size+10), int(region.pd_box[1]*self.video_size-50)), cv2.FONT_HERSHEY_PLAIN, 3, (255,255,255), 3) def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.camera: q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.camera: in_video = q_video.get() video_frame = in_video.getCvFrame() else: if self.image_mode: vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] dx = (w - self.video_size) // 2 dy = (h - self.video_size) // 2 video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size] frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) q_pd_in.send(frame_nn) pd_rtrip_time = now() seq_num += 1 annotated_frame = video_frame.copy() # Get palm detection inference = q_pd_out.get() if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Hand landmarks if self.use_lm: for i,r in enumerate(self.regions): img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length))) q_lm_in.send(nn_data) if i == 0: lm_rtrip_time = now() # We measure only for the first region # Retrieve hand landmarks for i,r in enumerate(self.regions): inference = q_lm_out.get() if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) nb_lm_inferences += 1 self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100)) cv2.imshow("video", annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_handedness = not self.show_handedness elif key == ord('6'): self.show_scores = not self.show_scores elif key == ord('7'): self.show_gesture = not self.show_gesture # Print some stats if not self.camera: print(f"# video files frames : {seq_num}") print(f"# palm detection inferences received : {nb_pd_inferences}") print(f"# hand landmark inferences received : {nb_lm_inferences}") print(f"Palm detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms") print(f"Hand landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.input_type == "internal": q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.input_type == "internal": in_video = q_video.get() video_frame = in_video.getCvFrame() self.frame_size = video_frame.shape[ 0] # The image is square cropped on the device self.pad_w = self.pad_h = 0 else: if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData( to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) pd_rtrip_time = now() q_pd_in.send(frame_nn) seq_num += 1 annotated_frame = video_frame.copy() # Get pose detection inference = q_pd_out.get() if self.input_type != "internal": pd_rtrip_time = now() - pd_rtrip_time glob_pd_rtrip_time += pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) for i, r in enumerate(self.regions): frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer( "input_1", to_planar(frame_nn, (self.lm_input_length, self.lm_input_length))) if i == 0: lm_rtrip_time = now( ) # We measure only for the first region q_lm_in.send(nn_data) # Get landmarks inference = q_lm_out.get() if i == 0: lm_rtrip_time = now() - lm_rtrip_time glob_lm_rtrip_time += lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if self.input_type != "internal" and not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps # Print some stats print(f"# pose detection inferences : {nb_pd_inferences}") print(f"# landmark inferences : {nb_lm_inferences}") if self.input_type != "internal" and nb_pd_inferences != 0: print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences != 0: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
from FPS import FPS # drive_data = DataControl() # drive_data.start() # drive_data.set("tfl", "red") # objd = OBJDetection(drive_data) # objd.load() # laned = LANEDetection(drive_data) cap = CVCapIN.CVCapIN(id_c=2) cap.start() # drive = Drive() optf = OpticalFlow() fpser = FPS() fpser.start() while cv2.waitKey(1) != ord("q"): # drive_data.set("tfl", "red") _, frame = cap.read() # cv2.imshow("ddd", frame) # laned_img = laned.run(frame.copy()) # cv2.imshow("laned", laned_img) # objd_img = objd.run(frame.copy()) # cv2.imshow("objd", objd_img) # drive.run() # print(str(drive_data)) optf_img = optf.run(frame)
class BlazeposeDepthai: def __init__(self, input_src=None, pd_path=POSE_DETECTION_MODEL, pd_score_thresh=0.5, pd_nms_thresh=0.3, lm_path=FULL_BODY_LANDMARK_MODEL, lm_score_threshold=0.7, full_body=True, use_gesture=False, smoothing=True, filter_window_size=5, filter_velocity_scale=10, show_3d=False, crop=False, multi_detection=False, output=None, internal_fps=15): self.pd_path = pd_path self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_path = lm_path self.lm_score_threshold = lm_score_threshold self.full_body = full_body self.use_gesture = use_gesture self.smoothing = smoothing self.show_3d = show_3d self.crop = crop self.multi_detection = multi_detection if self.multi_detection: print("With multi-detection, smoothing filter is disabled.") self.smoothing = False self.internal_fps = internal_fps if input_src == None: self.input_type = "internal" # OAK* internal color camera self.video_fps = internal_fps # Used when saving the output in a video file. Should be close to the real fps video_height = video_width = 1080 # Depends on cam.setResolution() in create_pipeline() elif input_src.endswith('.jpg') or input_src.endswith('.png'): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 video_height, video_width = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) self.nb_kps = 33 if self.full_body else 25 if self.smoothing: self.filter = mpu.LandmarksSmoothingFilter(filter_window_size, filter_velocity_scale, (self.nb_kps, 3)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Rendering flags self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture self.show_fps = True if self.show_3d: self.vis3d = o3d.visualization.Visualizer() self.vis3d.create_window() opt = self.vis3d.get_render_option() opt.background_color = np.asarray([0, 0, 0]) z = min(video_height, video_width) / 3 self.grid_floor = create_grid([0, video_height, -z], [video_width, video_height, -z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.grid_wall = create_grid([0, 0, z], [video_width, 0, z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.vis3d.add_geometry(self.grid_floor) self.vis3d.add_geometry(self.grid_wall) view_control = self.vis3d.get_view_control() view_control.set_up(np.array([0, -1, 0])) view_control.set_front(np.array([0, 0, -1])) if output is None: self.output = None else: fourcc = cv2.VideoWriter_fourcc(*"MJPG") self.output = cv2.VideoWriter(output, fourcc, self.video_fps, (video_width, video_height)) def create_pipeline(self): print("Creating pipeline...") # Start defining a pipeline pipeline = dai.Pipeline() pipeline.setOpenVINOVersion( version=dai.OpenVINO.Version.VERSION_2021_2) self.pd_input_length = 128 if self.input_type == "internal": # ColorCamera print("Creating Color Camera...") cam = pipeline.createColorCamera() cam.setPreviewSize(self.pd_input_length, self.pd_input_length) cam.setResolution( dai.ColorCameraProperties.SensorResolution.THE_1080_P) # Crop video to square shape (palm detection takes square image as input) self.video_size = min(cam.getVideoSize()) cam.setVideoSize(self.video_size, self.video_size) # cam.setFps(self.internal_fps) cam.setInterleaved(False) cam.setBoardSocket(dai.CameraBoardSocket.RGB) cam_out = pipeline.createXLinkOut() cam_out.setStreamName("cam_out") # Link video output to host for higher resolution cam.video.link(cam_out.input) # Define pose detection model print("Creating Pose Detection Neural Network...") pd_nn = pipeline.createNeuralNetwork() pd_nn.setBlobPath(str(Path(self.pd_path).resolve().absolute())) # Increase threads for detection # pd_nn.setNumInferenceThreads(2) # Specify that network takes latest arriving frame in non-blocking manner # Pose detection input if self.input_type == "internal": pd_nn.input.setQueueSize(1) pd_nn.input.setBlocking(False) cam.preview.link(pd_nn.input) else: pd_in = pipeline.createXLinkIn() pd_in.setStreamName("pd_in") pd_in.out.link(pd_nn.input) # Pose detection output pd_out = pipeline.createXLinkOut() pd_out.setStreamName("pd_out") pd_nn.out.link(pd_out.input) # Define landmark model print("Creating Landmark Neural Network...") lm_nn = pipeline.createNeuralNetwork() lm_nn.setBlobPath(str(Path(self.lm_path).resolve().absolute())) lm_nn.setNumInferenceThreads(1) # Landmark input self.lm_input_length = 256 lm_in = pipeline.createXLinkIn() lm_in.setStreamName("lm_in") lm_in.out.link(lm_nn.input) # Landmark output lm_out = pipeline.createXLinkOut() lm_out.setStreamName("lm_out") lm_nn.out.link(lm_out.input) print("Pipeline created.") return pipeline def pd_postprocess(self, inference): scores = np.array(inference.getLayerFp16("classificators"), dtype=np.float16) # 896 bboxes = np.array(inference.getLayerFp16("regressors"), dtype=np.float16).reshape( (self.nb_anchors, 12)) # 896x12 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors, best_only=not self.multi_detection) # Non maximum suppression (not needed if best_only is True) if self.multi_detection: self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) mpu.detections_to_rect(self.regions, kp_pair=[0, 1] if self.full_body else [2, 3]) mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) def pd_render(self, frame): for r in self.regions: if self.show_pd_box: box = (np.array(r.pd_box) * self.frame_size).astype(int) cv2.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2) if self.show_pd_kps: # Key point 0 - mid hip center # Key point 1 - point that encodes size & rotation (for full body) # Key point 2 - mid shoulder center # Key point 3 - point that encodes size & rotation (for upper body) if self.full_body: # Only kp 0 and 1 used list_kps = [0, 1] else: # Only kp 2 and 3 used for upper body list_kps = [2, 3] for kp in list_kps: x = int(r.pd_kps[kp][0] * self.frame_size) y = int(r.pd_kps[kp][1] * self.frame_size) cv2.circle(frame, (x, y), 3, (0, 0, 255), -1) cv2.putText(frame, str(kp), (x, y + 12), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2) if self.show_scores: cv2.putText( frame, f"Pose score: {r.pd_score:.2f}", (int(r.pd_box[0] * self.frame_size + 10), int((r.pd_box[1] + r.pd_box[3]) * self.frame_size + 60)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) def lm_postprocess(self, region, inference): region.lm_score = inference.getLayerFp16("output_poseflag")[0] if region.lm_score > self.lm_score_threshold: self.nb_active_regions += 1 lm_raw = np.array(inference.getLayerFp16("ld_3d")).reshape(-1, 5) # Each keypoint have 5 information: # - X,Y coordinates are local to the region of # interest and range from [0.0, 255.0]. # - Z coordinate is measured in "image pixels" like # the X and Y coordinates and represents the # distance relative to the plane of the subject's # hips, which is the origin of the Z axis. Negative # values are between the hips and the camera; # positive values are behind the hips. Z coordinate # scale is similar with X, Y scales but has different # nature as obtained not via human annotation, by # fitting synthetic data (GHUM model) to the 2D # annotation. # - Visibility, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame and not occluded by another bigger body # part or another object. # - Presence, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame. # Normalize x,y,z. Scaling in z = scaling in x = 1/self.lm_input_length lm_raw[:, :3] /= self.lm_input_length # Apply sigmoid on visibility and presence (if used later) # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5])) # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box region.landmarks = lm_raw[:, :3] # Calculate the landmark coordinate in square padded image (region.landmarks_padded) src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32) dst = np.array( [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32 ) # region.rect_points[0] is left bottom point and points going clockwise! mat = cv2.getAffineTransform(src, dst) lm_xy = np.expand_dims(region.landmarks[:self.nb_kps, :2], axis=0) lm_xy = np.squeeze(cv2.transform(lm_xy, mat)) # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the # original image. Then we arbitrarily divide by 4 for a more realistic appearance. lm_z = region.landmarks[:self.nb_kps, 2:3] * region.rect_w_a / 4 lm_xyz = np.hstack((lm_xy, lm_z)) if self.smoothing: lm_xyz = self.filter.apply(lm_xyz) region.landmarks_padded = lm_xyz.astype(np.int) # If we added padding to make the image square, we need to remove this padding from landmark coordinates # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed)) region.landmarks_abs = region.landmarks_padded.copy() if self.pad_h > 0: region.landmarks_abs[:, 1] -= self.pad_h if self.pad_w > 0: region.landmarks_abs[:, 0] -= self.pad_w if self.use_gesture: self.recognize_gesture(region) def lm_render(self, frame, region): if region.lm_score > self.lm_score_threshold: if self.show_rot_rect: cv2.polylines(frame, [np.array(region.rect_points)], True, (0, 255, 255), 2, cv2.LINE_AA) if self.show_landmarks: list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY lines = [ np.array( [region.landmarks_padded[point, :2] for point in line]) for line in list_connections ] cv2.polylines(frame, lines, False, (255, 180, 90), 2, cv2.LINE_AA) for i, x_y in enumerate(region.landmarks_padded[:, :2]): if i > 10: color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255) elif i == 0: color = (0, 255, 255) elif i in [4, 5, 6, 8, 10]: color = (0, 255, 0) else: color = (0, 0, 255) cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11) if self.show_3d: points = region.landmarks_abs lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY colors = COLORS_FULL_BODY for i, a_b in enumerate(lines): a, b = a_b line = create_segment(points[a], points[b], radius=5, color=colors[i]) if line: self.vis3d.add_geometry(line, reset_bounding_box=False) if self.show_scores: cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}", (int(region.pd_box[0] * self.frame_size + 10), int((region.pd_box[1] + region.pd_box[3]) * self.frame_size + 90)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) if self.use_gesture and self.show_gesture: cv2.putText(frame, region.gesture, (int(region.pd_box[0] * self.frame_size + 10), int(region.pd_box[1] * self.frame_size - 50)), cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3) def recognize_gesture(self, r): def angle_with_y(v): # v: 2d vector (x,y) # Returns angle in degree ofv with y-axis of image plane if v[1] == 0: return 90 angle = atan2(v[0], v[1]) return np.degrees(angle) # For the demo, we want to recognize the flag semaphore alphabet # For this task, we just need to measure the angles of both arms with vertical right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] - r.landmarks_abs[12, :2]) left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] - r.landmarks_abs[11, :2]) right_pose = int((right_arm_angle + 202.5) / 45) left_pose = int((left_arm_angle + 202.5) / 45) r.gesture = semaphore_flag.get((right_pose, left_pose), None) def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.input_type == "internal": q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.input_type == "internal": in_video = q_video.get() video_frame = in_video.getCvFrame() self.frame_size = video_frame.shape[ 0] # The image is square cropped on the device self.pad_w = self.pad_h = 0 else: if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData( to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) pd_rtrip_time = now() q_pd_in.send(frame_nn) seq_num += 1 annotated_frame = video_frame.copy() # Get pose detection inference = q_pd_out.get() if self.input_type != "internal": pd_rtrip_time = now() - pd_rtrip_time glob_pd_rtrip_time += pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) for i, r in enumerate(self.regions): frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer( "input_1", to_planar(frame_nn, (self.lm_input_length, self.lm_input_length))) if i == 0: lm_rtrip_time = now( ) # We measure only for the first region q_lm_in.send(nn_data) # Get landmarks inference = q_lm_out.get() if i == 0: lm_rtrip_time = now() - lm_rtrip_time glob_lm_rtrip_time += lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if self.input_type != "internal" and not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps # Print some stats print(f"# pose detection inferences : {nb_pd_inferences}") print(f"# landmark inferences : {nb_lm_inferences}") if self.input_type != "internal" and nb_pd_inferences != 0: print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences != 0: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
class MovenetDepthai: """ Movenet body pose detector Arguments: - input_src: frame source, - "rgb" or None: OAK* internal color camera, - "rgb_laconic": same as "rgb" but without sending the frames to the host, - a file path of an image or a video, - an integer (eg 0) for a webcam id, - model: Movenet blob file, - "thunder": the default thunder blob file (see variable MOVENET_THUNDER_MODEL), - "lightning": the default lightning blob file (see variable MOVENET_LIGHTNING_MODEL), - a path of a blob file. It is important that the filename contains the string "thunder" or "lightning" to identify the tyoe of the model. - score_thresh : confidence score to determine whether a keypoint prediction is reliable (a float between 0 and 1). - crop : boolean which indicates if square cropping is done or not - internal_fps : when using the internal color camera as input source, set its FPS to this value (calling setFps()). - internal_frame_height : when using the internal color camera, set the frame height (calling setIspScale()). The width is calculated accordingly to height and depends on value of 'crop' - stats : True or False, when True, display the global FPS when exiting. """ def __init__( self, input_src="rgb", model=None, score_thresh=0.2, crop=False, internal_fps=None, internal_frame_height=640, stats=True, ): self.model = model if model == "lightning": self.model = str(MOVENET_LIGHTNING_MODEL) self.pd_input_length = 192 elif model == "thunder": self.model = str(MOVENET_THUNDER_MODEL) self.pd_input_length = 256 else: self.model = model if "lightning" in str(model): self.pd_input_length = 192 else: # Thunder self.pd_input_length = 256 print(f"Using blob file : {self.model}") print( f"MoveNet imput size : {self.pd_input_length}x{self.pd_input_length}x3" ) self.score_thresh = score_thresh self.crop = crop self.internal_fps = internal_fps self.stats = stats if input_src is None or input_src == "rgb" or input_src == "rgb_laconic": self.input_type = "rgb" # OAK* internal color camera self.laconic = ("laconic" in input_src ) # Camera frames are not sent to the host if internal_fps is None: if "thunder" in str(model): self.internal_fps = 12 else: self.internal_fps = 26 else: self.internal_fps = internal_fps print(f"Internal camera FPS set to: {self.internal_fps}") self.video_fps = internal_fps # Used when saving the output in a video file. Should be close to the real fps if self.crop: self.frame_size, self.scale_nd = find_isp_scale_params( internal_frame_height) self.img_h = self.img_w = self.frame_size else: width, self.scale_nd = find_isp_scale_params( internal_frame_height * 1920 / 1080, is_height=False) self.img_h = int( round(1080 * self.scale_nd[0] / self.scale_nd[1])) self.img_w = int( round(1920 * self.scale_nd[0] / self.scale_nd[1])) self.frame_size = self.img_w print(f"Internal camera image size: {self.img_w} x {self.img_h}") elif input_src.endswith(".jpg") or input_src.endswith(".png"): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 self.img_h, self.img_w = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) self.img_w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.img_h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) # Defines the default crop region (pads the full image from both sides to make it a square image) # Used when the algorithm cannot reliably determine the crop region from the previous frame. box_size = max(self.img_w, self.img_h) x_min = (self.img_w - box_size) // 2 y_min = (self.img_h - box_size) // 2 self.init_crop_region = CropRegion(x_min, y_min, x_min + box_size, y_min + box_size, box_size) self.crop_region = self.init_crop_region self.device = dai.Device(self.create_pipeline()) # self.device.startPipeline() print("Pipeline started") # Define data queues if self.input_type == "rgb": if not self.laconic: self.q_video = self.device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) self.q_manip_cfg = self.device.getInputQueue(name="manip_cfg") else: self.q_pd_in = self.device.getInputQueue(name="pd_in") self.q_pd_out = self.device.getOutputQueue(name="pd_out", maxSize=4, blocking=False) self.q_ac_in = self.device.getInputQueue(name="ac_in") self.q_ac_out = self.device.getOutputQueue(name="ac_out", maxSize=4, blocking=False) self.fps = FPS() self.nb_frames = 0 self.nb_pd_inferences = 0 def create_pipeline(self): print("Creating pipeline...") # Start defining a pipeline pipeline = dai.Pipeline() pipeline.setOpenVINOVersion(dai.OpenVINO.Version.VERSION_2021_3) if self.input_type == "rgb": # ColorCamera print("Creating Color Camera...") # cam = pipeline.create(dai.node.ColorCamera) cam = pipeline.createColorCamera() cam.setResolution( dai.ColorCameraProperties.SensorResolution.THE_1080_P) cam.setInterleaved(False) cam.setIspScale(self.scale_nd[0], self.scale_nd[1]) cam.setFps(self.internal_fps) cam.setBoardSocket(dai.CameraBoardSocket.RGB) cam.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB) if self.crop: cam.setVideoSize(self.frame_size, self.frame_size) cam.setPreviewSize(self.frame_size, self.frame_size) else: cam.setVideoSize(self.img_w, self.img_h) cam.setPreviewSize(self.img_w, self.img_h) if not self.laconic: cam_out = pipeline.createXLinkOut() # cam_out = pipeline.create(dai.node.XLinkOut) cam_out.setStreamName("cam_out") cam.video.link(cam_out.input) # ImageManip for cropping manip = pipeline.createImageManip() manip.setMaxOutputFrameSize(self.pd_input_length * self.pd_input_length * 3) manip.setWaitForConfigInput(True) manip.inputImage.setQueueSize(1) manip.inputImage.setBlocking(False) manip_cfg = pipeline.createXLinkIn() manip_cfg.setStreamName("manip_cfg") cam.preview.link(manip.inputImage) manip_cfg.out.link(manip.inputConfig) # Define pose detection model print("Creating Pose Detection Neural Network...") pd_nn = pipeline.createNeuralNetwork() # pd_nn = pipeline.create(dai.node.NeuralNetwork) pd_nn.setBlobPath(str(Path(self.model).resolve().absolute())) # pd_nn.input.setQueueSize(1) # Increase threads for detection # pd_nn.setNumInferenceThreads(2) if self.input_type == "rgb": manip.out.link(pd_nn.input) else: pd_in = pipeline.createXLinkIn() pd_in.setStreamName("pd_in") pd_in.out.link(pd_nn.input) # Define link to send pd result to host pd_out = pipeline.createXLinkOut() # pd_out = pipeline.create(dai.node.XLinkOut) pd_out.setStreamName("pd_out") pd_nn.out.link(pd_out.input) # Define action detection model print("Creating Action Detection Neural Network...") ac_nn = pipeline.createNeuralNetwork() ac_nn.setBlobPath( str( Path("models/action_openvino_2021.3_6shave.blob").resolve(). absolute())) ac_in = pipeline.createXLinkIn() ac_in.setStreamName("ac_in") ac_in.out.link(ac_nn.input) # Define link to send pd result to host ac_out = pipeline.createXLinkOut() ac_out.setStreamName("ac_out") ac_nn.out.link(ac_out.input) print("Pipeline created.") return pipeline def crop_and_resize(self, frame, crop_region): """Crops and resize the image to prepare for the model input.""" cropped = frame[ max(0, crop_region.ymin):min(self.img_h, crop_region.ymax), max(0, crop_region.xmin):min(self.img_w, crop_region.xmax), ] if (crop_region.xmin < 0 or crop_region.xmax >= self.img_w or crop_region.ymin < 0 or crop_region.ymax >= self.img_h): # Padding is necessary cropped = cv2.copyMakeBorder( cropped, max(0, -crop_region.ymin), max(0, crop_region.ymax - self.img_h), max(0, -crop_region.xmin), max(0, crop_region.xmax - self.img_w), cv2.BORDER_CONSTANT, ) cropped = cv2.resize( cropped, (self.pd_input_length, self.pd_input_length), interpolation=cv2.INTER_AREA, ) return cropped def torso_visible(self, scores): """Checks whether there are enough torso keypoints. This function checks whether the model is confident at predicting one of the shoulders/hips which is required to determine a good crop region. """ return ( scores[KEYPOINT_DICT["left_hip"]] > self.score_thresh or scores[KEYPOINT_DICT["right_hip"]] > self.score_thresh) and ( scores[KEYPOINT_DICT["left_shoulder"]] > self.score_thresh or scores[KEYPOINT_DICT["right_shoulder"]] > self.score_thresh) def determine_torso_and_body_range(self, keypoints, scores, center_x, center_y): """Calculates the maximum distance from each keypoints to the center location. The function returns the maximum distances from the two sets of keypoints: full 17 keypoints and 4 torso keypoints. The returned information will be used to determine the crop size. See determine_crop_region for more detail. """ torso_joints = [ "left_shoulder", "right_shoulder", "left_hip", "right_hip" ] max_torso_yrange = 0.0 max_torso_xrange = 0.0 for joint in torso_joints: dist_y = abs(center_y - keypoints[KEYPOINT_DICT[joint]][1]) dist_x = abs(center_x - keypoints[KEYPOINT_DICT[joint]][0]) if dist_y > max_torso_yrange: max_torso_yrange = dist_y if dist_x > max_torso_xrange: max_torso_xrange = dist_x max_body_yrange = 0.0 max_body_xrange = 0.0 for i in range(len(KEYPOINT_DICT)): if scores[i] < self.score_thresh: continue dist_y = abs(center_y - keypoints[i][1]) dist_x = abs(center_x - keypoints[i][0]) if dist_y > max_body_yrange: max_body_yrange = dist_y if dist_x > max_body_xrange: max_body_xrange = dist_x return [ max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange ] def determine_crop_region(self, body): """Determines the region to crop the image for the model to run inference on. The algorithm uses the detected joints from the previous frame to estimate the square region that encloses the full body of the target person and centers at the midpoint of two hip joints. The crop size is determined by the distances between each joints and the center point. When the model is not confident with the four torso joint predictions, the function returns a default crop which is the full image padded to square. """ if self.torso_visible(body.scores): center_x = (body.keypoints[KEYPOINT_DICT["left_hip"]][0] + body.keypoints[KEYPOINT_DICT["right_hip"]][0]) // 2 center_y = (body.keypoints[KEYPOINT_DICT["left_hip"]][1] + body.keypoints[KEYPOINT_DICT["right_hip"]][1]) // 2 ( max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange, ) = self.determine_torso_and_body_range(body.keypoints, body.scores, center_x, center_y) crop_length_half = np.amax([ max_torso_xrange * 1.9, max_torso_yrange * 1.9, max_body_yrange * 1.2, max_body_xrange * 1.2, ]) tmp = np.array([ center_x, self.img_w - center_x, center_y, self.img_h - center_y ]) crop_length_half = int( round(np.amin([crop_length_half, np.amax(tmp)]))) crop_corner = [ center_x - crop_length_half, center_y - crop_length_half ] if crop_length_half > max(self.img_w, self.img_h) / 2: return self.init_crop_region else: crop_length = crop_length_half * 2 return CropRegion( crop_corner[0], crop_corner[1], crop_corner[0] + crop_length, crop_corner[1] + crop_length, crop_length, ) else: return self.init_crop_region def pd_postprocess(self, inference): kps = np.array(inference.getLayerFp16("Identity")).reshape(-1, 3) # 17x3 body = Body( scores=kps[:, 2], keypoints_norm=kps[:, [1, 0]], score_thresh=self.score_thresh, ) body.keypoints = ( np.array([self.crop_region.xmin, self.crop_region.ymin]) + body.keypoints_norm * self.crop_region.size).astype(np.int) body.crop_region = self.crop_region body.next_crop_region = self.determine_crop_region(body) return body def next_frame(self): self.fps.update() if self.input_type == "rgb": # Send cropping information to manip node on device cfg = dai.ImageManipConfig() points = [ [self.crop_region.xmin, self.crop_region.ymin], [self.crop_region.xmax - 1, self.crop_region.ymin], [self.crop_region.xmax - 1, self.crop_region.ymax - 1], [self.crop_region.xmin, self.crop_region.ymax - 1], ] point2fList = [] for p in points: pt = dai.Point2f() pt.x, pt.y = p[0], p[1] point2fList.append(pt) cfg.setWarpTransformFourPoints(point2fList, False) cfg.setResize(self.pd_input_length, self.pd_input_length) cfg.setFrameType(dai.ImgFrame.Type.RGB888p) self.q_manip_cfg.send(cfg) # Get the device camera frame if wanted if self.laconic: frame = np.zeros((self.frame_size, self.frame_size, 3), dtype=np.uint8) else: in_video = self.q_video.get() frame = in_video.getCvFrame() else: if self.input_type == "image": frame = self.img.copy() else: ok, frame = self.cap.read() if not ok: return None, None # Cropping of the video frame cropped = self.crop_and_resize(frame, self.crop_region) cropped = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB).transpose(2, 0, 1) frame_nn = dai.ImgFrame() frame_nn.setTimestamp(time.monotonic()) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData(cropped) self.q_pd_in.send(frame_nn) # Get result from device inference = self.q_pd_out.get() body = self.pd_postprocess(inference) self.crop_region = body.next_crop_region # Statistics if self.stats: self.nb_frames += 1 self.nb_pd_inferences += 1 return frame, body def exit(self): # Print some stats if self.stats: print( f"FPS : {self.fps.global_duration():.1f} f/s (# frames = {self.fps.nb_frames()})" )
def __init__(self, master): """ Start the GUI and the asynchronous threads. We are in the main (original) thread of the application, which will later be used by the GUI as well. """ self.master = master self.gps_destination_file_name = "" self.frames_destination_timestamp = "" self.video_destination_file_handler = cv2.VideoWriter() self.gps_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/GPS/" self.video_destination_dir = "/home/knorr-bremse/Projects/Digits4RailMaps/icom_track_gui/Videos/" self.record = False self.img = None self.frame = None self.frame1 = None self.f = 0 self.check = False self.brahim = 1.0 / 25 # p = Properties() # with open('config.properties', 'rb') as f: # p.load(f, 'utf-8') # print(p["video_id"].data) #### incremtentaiton properties file # p["video_id"]=str(int(p["video_id"].data) +1) # print(p["video_id"].data) # with open('config.properties', 'wb') as f: # p.store(f, encoding="utf-8") self.frame = None # Create the gps queue self.gps_queue = Queue() # principal thread self.running = 1 self.cameras_state = self.find_cam() print(self.cameras_state) if self.cameras_state[0]: self.camera = See3Cam(src=self.cameras_state[1], width=1280, height=720, framerate=30, name="CAM") self.camera.start() if self.cameras_state[2]: self.camera1 = See3Cam(src=self.cameras_state[3], width=1280, height=720, framerate=30, name="CAM1") self.camera1.start() self.fps = FPS() # Set up Gps self.gps = GpsCapture() # Set up the GUI part self.gui = GuiPart(master, self, self.cameras_state, self.gps.running, self.recordData, self.stopRecord) # Set up the thread for the GPS checking gps_controller = threading.Thread(target=self.checkGpsConnection, args=(2, )) gps_controller.setDaemon(True) gps_controller.start() # Set up the thread for the camera checking camera_controller = threading.Thread(target=self.checkCameraConnection, args=(2, )) camera_controller.setDaemon(True) camera_controller.start() # # Set up the thread for the video stream # camera_controller = threading.Thread(target=self.readCameraThread, args=(self.brahim,)) # camera_controller.setDaemon(True) # camera_controller.start() # Set up the thread for the GPS stream camera_controller = threading.Thread( target=self.readGpsThread, args=(self.brahim, )) ## refactoring camera_controller.setDaemon(True) camera_controller.start() # Set up the Thread for the data recording self.recordingThread = RecordingThreadController( self, self.video_destination_dir, self.gps_destination_dir, self.gui) self.video_output = True # Start the periodic call in the GUI . self.fps.start() self.periodicCall()
# construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-n", "--num-frames", type=int, default=100, help="# of frames to loop over for FPS test") ap.add_argument("-d", "--display", type=int, default=1, help="Whether or not frames should be displayed") args = vars(ap.parse_args()) time.sleep(2.0) fps = FPS().start() Ar = aruco(98) Ar.start() st = time.time() num = 0 # loop over some frames...this time using the threaded stream while Ar.stopped == False: time.sleep(.1) fps.stop() #print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) #print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) print(100 / (time.time() - st)) # do a bit of cleanup cv2.destroyAllWindows()
from picamera import PiCamera import argparse import time import cv2 ap = argparse.ArgumentParser() ap.add_argument("-n", "--num-frames", type = int, default = 100, help = "Number of frames for FPS test") ap.add_argument("-d", "--display", type = int, default = 3, help = "Show frames or not") args = vars(ap.parse_args()) # init the camera! print "Sampling frames from threaded camera module..." vs = piStream().start() time.sleep(2.0) fps = FPS().start() while fps._numFrames < args["num_frames"]: # grab frame frame = vs.read() if args["display"] > 0: cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF fps.update() fps.stop() print "Elapsed time: ", fps.elapsed() print "FPS: ", fps.fps()
def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.camera: q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.camera: in_video = q_video.get() video_frame = in_video.getCvFrame() else: if self.image_mode: vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] dx = (w - self.video_size) // 2 dy = (h - self.video_size) // 2 video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size] frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) q_pd_in.send(frame_nn) pd_rtrip_time = now() seq_num += 1 annotated_frame = video_frame.copy() # Get palm detection inference = q_pd_out.get() if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Hand landmarks if self.use_lm: for i,r in enumerate(self.regions): img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length))) q_lm_in.send(nn_data) if i == 0: lm_rtrip_time = now() # We measure only for the first region # Retrieve hand landmarks for i,r in enumerate(self.regions): inference = q_lm_out.get() if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) nb_lm_inferences += 1 self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100)) cv2.imshow("video", annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_handedness = not self.show_handedness elif key == ord('6'): self.show_scores = not self.show_scores elif key == ord('7'): self.show_gesture = not self.show_gesture # Print some stats if not self.camera: print(f"# video files frames : {seq_num}") print(f"# palm detection inferences received : {nb_pd_inferences}") print(f"# hand landmark inferences received : {nb_lm_inferences}") print(f"Palm detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms") print(f"Hand landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
video.set(cv2.CAP_PROP_FRAME_WIDTH, w) video.set(cv2.CAP_PROP_FRAME_HEIGHT, h) ok, frame = video.read() h, w, _ = frame.shape if args.output: fourcc = cv2.VideoWriter_fourcc(*"MJPG") out = cv2.VideoWriter(args.output, fourcc, 30, (w, h)) my_op = OP(openpose_rendering=args.rendering, number_people_max=args.number_people_max, min_size=60, face_detection=args.face, frt=args.frt) fps = FPS() while True: # Read a new frame ok, frame = video.read() if not ok: break fps.update() frame = frame.copy() nb_persons, body_kps, face_kps = my_op.eval(frame) #print(2) my_op.draw_body(frame) if args.face: my_op.draw_face(frame) my_op.draw_eyes(frame) fps.display(frame)
def __init__(self, use_face_tracking=True, kbd_layout="QWERTY", write_log_data=False, media_directory="media", child_cnx=None, log_level=None): self.log_level = log_level self.debug = log_level is not None self.child_cnx = child_cnx self.use_multiprocessing = child_cnx is not None self.kbd_layout = kbd_layout # Flight data self.is_flying = False self.battery = None self.fly_mode = None self.throw_fly_timer = 0 self.tracking_after_takeoff = False self.record = False self.keydown = False self.date_fmt = '%Y-%m-%d_%H%M%S' self.drone = tellopy.Tello( start_recv_thread=not self.use_multiprocessing) self.axis_command = { "yaw": self.drone.clockwise, "roll": self.drone.right, "pitch": self.drone.forward, "throttle": self.drone.up } self.axis_speed = {"yaw": 0, "roll": 0, "pitch": 0, "throttle": 0} self.cmd_axis_speed = {"yaw": 0, "roll": 0, "pitch": 0, "throttle": 0} self.prev_axis_speed = self.axis_speed.copy() self.def_speed = {"yaw": 50, "roll": 35, "pitch": 35, "throttle": 80} self.write_log_data = write_log_data self.reset() self.media_directory = media_directory if not os.path.isdir(self.media_directory): os.makedirs(self.media_directory) if self.write_log_data: path = 'tello-%s.csv' % datetime.datetime.now().strftime( '%Y-%m-%d_%H%M%S') self.log_file = open(path, 'w') self.write_header = True self.init_drone() if not self.use_multiprocessing: self.init_sounds() self.init_controls() # container for processing the packets into frames self.container = av.open(self.drone.get_video_stream()) self.vid_stream = self.container.streams.video[0] self.out_file = None self.out_stream = None self.out_name = None self.start_time = time.time() # Setup Openpose if not self.use_multiprocessing: self.op = OP(number_people_max=1, min_size=25, debug=self.debug) self.use_openpose = False self.morse = CameraMorse(display=False) self.morse.define_command("---", self.delayed_takeoff) self.morse.define_command("...", self.throw_and_go, {'tracking': True}) self.is_pressed = False self.fps = FPS() self.exposure = 0 if self.debug: self.graph_pid = RollingGraph(window_name="PID", step_width=2, width=2000, height=500, y_max=200, colors=[(255, 255, 255), (255, 200, 0), (0, 0, 255), (0, 255, 0)], thickness=[2, 2, 2, 2], threshold=100, waitKey=False) # Logging self.log_level = log_level if log_level is not None: if log_level == "info": log_level = logging.INFO elif log_level == "debug": log_level = logging.DEBUG log.setLevel(log_level) ch = logging.StreamHandler(sys.stdout) ch.setLevel(log_level) ch.setFormatter( logging.Formatter( fmt= '%(asctime)s.%(msecs)03d - %(name)s - %(levelname)s - %(message)s', datefmt="%H:%M:%S")) log.addHandler(ch)
def main_core(detector, embedder, recognizer, le, frame_queue, pframe_queue): print('[INFO] Starting:', mp.current_process().name) # time.sleep(1.0) # initialize the video stream, then allow the camera sensor to warm up ## Set Threding to start filming video_getter = VideoGet(frame_queue=frame_queue, src=0, name='Video Getter') time.sleep(1.0) # print('[INFO] Starting VideoGet...') video_getter.start() time.sleep(1.0) cpt = 0 exitbool = False fps_count = FPS(2).start() while True: frame = video_getter.frame.copy() fps_count.istime( ) # Calculate if enough time has passed for process the frame if fps_count.boolFrames: fps_count.updateactualtime() # Update the comparison time face_data = acquire_frame(detector, embedder, frame, recognizer, le, 0.5, 0.65) #,fa) pframe_queue.put(face_data) for item in face_data: # print(item[2:]) frame = draw_frame(frame, item) fps_count.update() # cpt +=1 exitbool = show_frame(frame) if exitbool or cpt == 80: # fps_count.stop() print("[INFO] elasped time fps processed: {:.2f}".format( fps_count.elapsed())) print("[INFO] approx. processed FPS: {:.2f}".format( fps_count.fps())) time.sleep(1) video_getter.stop() time.sleep(2) print('[INFO] Exiting :', mp.current_process().name) break
print("[INFO] sampling THREADED frames from `picamera` module...") vs = PiVideoStream().start() #if args["display"] > 0: # disp = Display().start() time.sleep(2.0) prv = vs.read() prv = cv2.cvtColor(prv,cv2.COLOR_BGR2GRAY) prv = cv2.GaussianBlur(prv,(17,17),0) center_y, center_x = (item//2 for item in prv.shape) fps = FPS().start() # loop over some frames...this time using the threaded stream while fps._numFrames < args["num_frames"]: # grab the frame from the threaded video stream and resize it # to have a maximum width of 400 pixels frame = vs.read() #frame = imutils.resize(frame, width=400) # check to see if the frame should be displayed to our screen img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) result = preprocess(img, prv) if args["display"] > 0: #disp.preview(result)
def __init__( self, input_src="rgb", model=None, score_thresh=0.2, crop=False, internal_fps=None, internal_frame_height=640, stats=True, ): self.model = model if model == "lightning": self.model = str(MOVENET_LIGHTNING_MODEL) self.pd_input_length = 192 elif model == "thunder": self.model = str(MOVENET_THUNDER_MODEL) self.pd_input_length = 256 else: self.model = model if "lightning" in str(model): self.pd_input_length = 192 else: # Thunder self.pd_input_length = 256 print(f"Using blob file : {self.model}") print( f"MoveNet imput size : {self.pd_input_length}x{self.pd_input_length}x3" ) self.score_thresh = score_thresh self.crop = crop self.internal_fps = internal_fps self.stats = stats if input_src is None or input_src == "rgb" or input_src == "rgb_laconic": self.input_type = "rgb" # OAK* internal color camera self.laconic = ("laconic" in input_src ) # Camera frames are not sent to the host if internal_fps is None: if "thunder" in str(model): self.internal_fps = 12 else: self.internal_fps = 26 else: self.internal_fps = internal_fps print(f"Internal camera FPS set to: {self.internal_fps}") self.video_fps = internal_fps # Used when saving the output in a video file. Should be close to the real fps if self.crop: self.frame_size, self.scale_nd = find_isp_scale_params( internal_frame_height) self.img_h = self.img_w = self.frame_size else: width, self.scale_nd = find_isp_scale_params( internal_frame_height * 1920 / 1080, is_height=False) self.img_h = int( round(1080 * self.scale_nd[0] / self.scale_nd[1])) self.img_w = int( round(1920 * self.scale_nd[0] / self.scale_nd[1])) self.frame_size = self.img_w print(f"Internal camera image size: {self.img_w} x {self.img_h}") elif input_src.endswith(".jpg") or input_src.endswith(".png"): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 self.img_h, self.img_w = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) self.img_w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.img_h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) # Defines the default crop region (pads the full image from both sides to make it a square image) # Used when the algorithm cannot reliably determine the crop region from the previous frame. box_size = max(self.img_w, self.img_h) x_min = (self.img_w - box_size) // 2 y_min = (self.img_h - box_size) // 2 self.init_crop_region = CropRegion(x_min, y_min, x_min + box_size, y_min + box_size, box_size) self.crop_region = self.init_crop_region self.device = dai.Device(self.create_pipeline()) # self.device.startPipeline() print("Pipeline started") # Define data queues if self.input_type == "rgb": if not self.laconic: self.q_video = self.device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) self.q_manip_cfg = self.device.getInputQueue(name="manip_cfg") else: self.q_pd_in = self.device.getInputQueue(name="pd_in") self.q_pd_out = self.device.getOutputQueue(name="pd_out", maxSize=4, blocking=False) self.q_ac_in = self.device.getInputQueue(name="ac_in") self.q_ac_out = self.device.getOutputQueue(name="ac_out", maxSize=4, blocking=False) self.fps = FPS() self.nb_frames = 0 self.nb_pd_inferences = 0