def setup(args): global input_path, output_path, device, cpu_extension, prob_threshold, flags, mouse_controller, feeder, video_writer, model_dict, model_loading_total_time model_args = [ args.face_detection_model, args.facial_landmarks_detection_model, args.head_pose_estimation_model, args.gaze_estimation_model, ] model_class = [ Model_FaceDetection, Model_FacialLandMarkDetection, Model_HeadPoseEstimation, Model_GazeEstimation, ] input_path = input_path_generator(args.input) if args.input != "CAM" else None output_path = output_path_generator(args.output) device = args.device cpu_extension = args.cpu_extension prob_threshold = args.prob_threshold flags = args.flags if not os.path.exists(output_path): os.mkdir(output_path) mouse_controller = MouseController("low", "fast") if input_path: if input_path.endswith(".jpg"): feeder = InputFeeder("image", input_path) else: feeder = InputFeeder("video", input_path) else: feeder = InputFeeder("cam") feeder.load_data() fps = feeder.fps() initial_w, initial_h, video_len = feeder.frame_initials_and_length() video_writer = cv2.VideoWriter( os.path.join(output_path, "output_video.mp4"), cv2.VideoWriter_fourcc(*"avc1"), fps / 10, (initial_w, initial_h), True, ) model_dict, model_loading_total_time = generate_model_dict(model_args, model_class) return
def infer_on_stream(args): face_detection_model_file = args.faceDetectionModel facial_landmarks_detection_model_file = args.facialLandmarksModel head_pose_estimation_model_file = args.headPoseModel gaze_estimation_model_file = args.gazeModel video_file = args.input device_name = args.device cpu_extension = args.cpu_extension prob_threshold = args.prob_threshold preview_flag = args.preview_flag output_path = args.output_path if not os.path.exists(output_path): os.mkdir(output_path) mouse_control = MouseController("low", "fast") try: logging.info("*********** Model Load Time ***************") start_model_load_time = time.time() start_time = time.time() face_detection_model = FaceDetectionModel(face_detection_model_file, device_name, cpu_extension) logging.info("Face Detection Model: {:.1f} ms.".format( 1000 * (time.time() - start_time))) start_time = time.time() facial_landmarks_detection_model = FacialLandmarksDetectionModel( facial_landmarks_detection_model_file, device_name, cpu_extension) logging.info("Facial Landmarks Detection Model: {:.1f} ms.".format( 1000 * (time.time() - start_time))) start_time = time.time() head_pose_estimation_model = HeadPoseEstimationModel( head_pose_estimation_model_file, device_name, cpu_extension) logging.info("Head Pose Estimation Model: {:.1f} ms.".format( 1000 * (time.time() - start_time))) start_time = time.time() gaze_estimation_model = GazeEstimationModel(gaze_estimation_model_file, device_name, cpu_extension) logging.info("Gaze Estimation Model: {:.1f} ms.".format( 1000 * (time.time() - start_time))) total_model_load_time = time.time() - start_model_load_time logging.info("*********** Model Load Completed ***********") except Exception as e: logging.error("ERROR in model loading: " + str(e)) sys.exit(1) feeder = InputFeeder('video', video_file) feeder.load_data() out_video = cv2.VideoWriter(os.path.join(output_path, 'output_video.mp4'), cv2.VideoWriter_fourcc(*'avc1'), int(feeder.fps() / 10), (1920, 1080), True) start_inference_time = 0 frame_count = 0 face_detect_infer_time = 0 facial_landmarks_infer_time = 0 head_pose_infer_time = 0 gaze_infer_time = 0 while True: try: frame = next(feeder.next_batch()) except StopIteration: break key_pressed = cv2.waitKey(60) frame_count += 1 ## Face Detecton Model image = face_detection_model.preprocess_input(frame) start_time = time.time() outputs = face_detection_model.predict(image) face_detect_infer_time += (time.time() - start_time) out_frame, faces = face_detection_model.preprocess_output( outputs, frame, preview_flag, prob_threshold) for face in faces: crop_image = frame[face[1]:face[3], face[0]:face[2]] ## Facial Landmarks Detecton Model image = facial_landmarks_detection_model.preprocess_input( crop_image) start_time = time.time() outputs = facial_landmarks_detection_model.predict(image) facial_landmarks_infer_time += (time.time() - start_time) out_frame, left_eye_point, right_eye_point = facial_landmarks_detection_model.preprocess_output( outputs, out_frame, face, preview_flag) ## Head Pose Estimation Model image = head_pose_estimation_model.preprocess_input(crop_image) start_time = time.time() outputs = head_pose_estimation_model.predict(image) head_pose_infer_time += (time.time() - start_time) out_frame, headpose_angels_list = head_pose_estimation_model.preprocess_output( outputs, out_frame, preview_flag) ## Gaze Estimation Model out_frame, left_eye, right_eye = gaze_estimation_model.preprocess_input( out_frame, crop_image, left_eye_point, right_eye_point) start_time = time.time() outputs = gaze_estimation_model.predict(left_eye, right_eye, headpose_angels_list) gaze_infer_time += (time.time() - start_time) out_frame, gazevector = gaze_estimation_model.preprocess_output( outputs, out_frame, face, left_eye_point, right_eye_point, preview_flag) cv2.imshow("Computer Pointer Control", out_frame) out_video.write(out_frame) mouse_control.move(gazevector[0], gazevector[1]) if key_pressed == 27: break if frame_count > 0: logging.info("*********** Model Inference Time ****************") logging.info("Face Detection Model: {:.1f} ms.".format( 1000 * face_detect_infer_time / frame_count)) logging.info("Facial Landmarks Detection Model: {:.1f} ms.".format( 1000 * facial_landmarks_infer_time / frame_count)) logging.info("Head Pose Detection Model: {:.1f} ms.".format( 1000 * head_pose_infer_time / frame_count)) logging.info("Gaze Detection Model: {:.1f} ms.".format( 1000 * gaze_infer_time / frame_count)) logging.info("*********** Model Inference Completed ***********") total_infer_time = time.time() - start_inference_time total_inference_time = round(total_infer_time, 1) fps = frame_count / total_inference_time with open(os.path.join(output_path, 'stats.txt'), 'w') as f: f.write(str(total_inference_time) + '\n') f.write(str(fps) + '\n') f.write(str(total_model_load_time) + '\n') logging.info("*********** Total Summary ****************") logging.info(f"Total Model Load Time: {total_model_load_time}") logging.info(f"Total Inference Time: {total_inference_time}") logging.info(f"FPS: {fps}") logging.info("*********** Total Summary ***********") logging.info("*********** ************************* ***********") feeder.close() cv2.destroyAllWindows()