def initialize_models(self): try: model_precision = self.args.model.upper() self.face_detection_model = Model_Face_Detection( "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001" ) start = time.time() self.face_detection_model.load_model() self.face_detection_load_time = time.time() - start self.facial_landmark_detection_model = Model_Facial_Landmark_Detection( f"models/intel/landmarks-regression-retail-0009/{model_precision}/landmarks-regression-retail-0009", self.args.device.upper()) start = time.time() self.facial_landmark_detection_model.load_model() self.facial_landmark_detection_load_time = time.time() - start self.head_pose_estimation_model = Model_Head_Pose_estimation( f"models/intel/head-pose-estimation-adas-0001/{model_precision}/head-pose-estimation-adas-0001", self.args.device.upper()) start = time.time() self.head_pose_estimation_model.load_model() self.head_pose_estimation_load_time = time.time() - start self.gaze_estimation_model = Model_Gaze_Estimation( f"models/intel/gaze-estimation-adas-0002/{model_precision}/gaze-estimation-adas-0002", self.args.device.upper()) start = time.time() self.gaze_estimation_model.load_model() self.gaze_estimation_load_time = time.time() - start except Exception as err: log.error("Could not load model. Cause: ", str(err))
def infer(args): """ Initialize the inference network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :return: None """ # Set Probability threshold for detections prob_threshold = args.prob_threshold if args.input.lower() == "cam": input_feeder = InputFeeder("cam") else: if not os.path.isfile(args.input): logger.error("Unable to find input file") exit(1) input_feeder = InputFeeder("video", args.input) start_time = time.time() model_fd = Model_Face_Detection(args.facedetectionmodel, args.device, args.cpu_extension) model_fld = Model_Facial_Landmarks_Detection(args.faciallandmarkmodel, args.device, args.cpu_extension) model_ge = Model_Gaze_Estimation(args.gazeestimationmodel, args.device, args.cpu_extension) model_hp = Model_Head_Pose_Estimation(args.headposemodel, args.device, args.cpu_extension) mc = MouseController('medium', 'fast') input_feeder.load_data() model_fd.load_model() model_fld.load_model() model_ge.load_model() model_hp.load_model() loading_time = time.time() - start_time logger.info("Loading time of the models: " + str(loading_time) + " s") frame_count = 0 inference_time = 0 for flag, frame in input_feeder.next_batch(): if not flag: break if frame is None: continue key = cv2.waitKey(60) if key == 27: break frame_count += 1 start_inference = time.time() face, face_coords = model_fd.predict(frame, prob_threshold) if type(face) == int: logger.error("No face detected.") continue out_hp = model_hp.predict(face) left_eye, right_eye, eye_coords = model_fld.predict(face) mouse_coord, gaze_vector = model_ge.predict(left_eye, right_eye, out_hp) inference_time += time.time() - start_inference if len(args.flags) != 0: frame_p = frame.copy() if 'fd' in args.flags: frame_p = face if 'fld' in args.flags: cv2.rectangle(face, (eye_coords[0][0] - 10, eye_coords[0][1] - 10), (eye_coords[0][2] + 10, eye_coords[0][3] + 10), (0, 255, 0), 3) cv2.rectangle(face, (eye_coords[1][0] - 10, eye_coords[1][1] - 10), (eye_coords[1][2] + 10, eye_coords[1][3] + 10), (0, 255, 0), 3) if 'hp' in args.flags: cv2.putText( frame_p, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}". format(out_hp[0], out_hp[1], out_hp[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.2, (255, 255, 255), 1) if 'ge' in args.flags: x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] * 12), 160 le = cv2.line(left_eye, (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) re = cv2.line(right_eye, (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) face[eye_coords[0][1]:eye_coords[0][3], eye_coords[0][0]:eye_coords[0][2]] = le face[eye_coords[1][1]:eye_coords[1][3], eye_coords[1][0]:eye_coords[1][2]] = re cv2.imshow("visualization", cv2.resize(preview_frame, (500, 500))) inference_time += time.time() - start_inference # mouse move at 5 FPS if frame_count % 5 == 0: mc.move(mouse_coord[0], new_mouse_coord[1]) logger.info("Total inference time {} s".format(inference_time)) logger.info("Average inference time {} s".format(inference_time / frame_count)) logger.info("FPS {} frame/second".format(frame_count / (inference_time * 5))) cv2.destroyAllWindows() input_feeder.close()
def main(): args = build_argparser().parse_args() previewFlags = args.previewFlags logger = logging.getLogger() inputFile = args.input inputFeeder = None if inputFile.lower() == "cam": inputFeeder = InputFeeder("cam") else: if not os.path.isfile(inputFile): logger.error("Unable to find input file") exit(1) inputFeeder = InputFeeder("video", inputFile) start_loading = time.time() mfd = Model_Face_Detection(args.facedetectionmodel, args.device, args.cpu_extension) mfld = Model_Facial_Landmarks_Detection(args.faciallandmarkmodel, args.device, args.cpu_extension) mge = Model_Gaze_Estimation(args.gazeestimationmodel, args.device, args.cpu_extension) mhpe = Model_Head_Pose_Estimation(args.headposemodel, args.device, args.cpu_extension) mc = MouseController('medium', 'fast') inputFeeder.load_data() mfd.load_model() mfld.load_model() mge.load_model() mhpe.load_model() model_loading_time = time.time() - start_loading counter = 0 frame_count = 0 inference_time = 0 start_inf_time = time.time() for ret, frame in inputFeeder.next_batch(): if not ret: break if frame is not None: frame_count += 1 if frame_count % 5 == 0: cv2.imshow('video', cv2.resize(frame, (500, 500))) key = cv2.waitKey(60) start_inference = time.time() croppedFace, face_coords = mfd.predict(frame.copy(), args.prob_threshold) if type(croppedFace) == int: logger.error("No face detected.") if key == 27: break continue hp_out = mhpe.predict(croppedFace.copy()) left_eye, right_eye, eye_coords = mfld.predict(croppedFace.copy()) new_mouse_coord, gaze_vector = mge.predict(left_eye, right_eye, hp_out) stop_inference = time.time() inference_time = inference_time + stop_inference - start_inference counter = counter + 1 if (not len(previewFlags) == 0): preview_window = frame.copy() if 'fd' in previewFlags: if len(previewFlags) != 1: preview_window = croppedFace else: cv2.rectangle(preview_window, (face_coords[0], face_coords[1]), (face_coords[2], face_coords[3]), (0, 150, 0), 3) if 'fld' in previewFlags: if not 'fd' in previewFlags: preview_window = croppedFace.copy() cv2.rectangle( preview_window, (eye_coords[0][0] - 10, eye_coords[0][1] - 10), (eye_coords[0][2] + 10, eye_coords[0][3] + 10), (0, 255, 0), 3) cv2.rectangle( preview_window, (eye_coords[1][0] - 10, eye_coords[1][1] - 10), (eye_coords[1][2] + 10, eye_coords[1][3] + 10), (0, 255, 0), 3) if 'hp' in previewFlags: cv2.putText( preview_window, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}". format(hp_out[0], hp_out[1], hp_out[2]), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA) if 'ge' in previewFlags: if not 'fd' in previewFlags: preview_window = croppedFace.copy() x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] * 12), 160 le = cv2.line(left_eye.copy(), (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) re = cv2.line(right_eye.copy(), (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) preview_window[eye_coords[0][1]:eye_coords[0][3], eye_coords[0][0]:eye_coords[0][2]] = le preview_window[eye_coords[1][1]:eye_coords[1][3], eye_coords[1][0]:eye_coords[1][2]] = re if len(previewFlags) != 0: img_hor = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_window, (500, 500)))) else: img_hor = cv2.resize(frame, (500, 500)) cv2.imshow('Visualization', img_hor) if frame_count % 5 == 0: mc.move(new_mouse_coord[0], new_mouse_coord[1]) if key == 27: break fps = frame_count / inference_time logger.error("video ended...") logger.error("Total loading time of the models: " + str(model_loading_time) + " s") logger.error("total inference time {} seconds".format(inference_time)) logger.error("Average inference time: " + str(inference_time / frame_count) + " s") logger.error("fps {} frame/second".format(fps / 5)) cv2.destroyAllWindows() inputFeeder.close()
def main(): # get arguments args = get_args() visualization_list = args.visualize prob = args.prob if prob == None: prob = 0.5 input_type = args.input_file input_path = args.input_path #logging config logging.basicConfig(filename="app.log", level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(message)s') # Initialize models try: fd = Model_Face_Detection(args.face_detection, args.device, args.extention) ld = Model_Facial_Landmark_Detection(args.landmark_detection, args.device, args.extention) hp = Model_Head_Pose_Estimation(args.head_pose_detection, args.device, args.extention) gd = Model_Gaze_Estimation(args.gaze_detection, args.device, args.extention) except: logging.error("Error in initializing models") exit(1) # load models try: start_loading_time_fd = time.time() fd.load_model() fd_time_diff = time.time() - start_loading_time_fd start_loading_time_ld = time.time() ld.load_model() ld_time_diff = time.time() - start_loading_time_ld start_loading_time_hp = time.time() hp.load_model() hp_time_diff = time.time() - start_loading_time_hp start_loading_time_gd = time.time() gd.load_model() gd_time_diff = time.time() - start_loading_time_gd except: logging.error("Error in loading the models") exit(1) logging.debug( "Loading times are facial detection : {} , landmark detection : {} , head pose detection : {} , gaze estimation : {} " .format(fd_time_diff, ld_time_diff, hp_time_diff, gd_time_diff)) if input_type.lower() != "cam": if not os.path.isfile(input_path): logging.error("Unable to find specified video file") exit(1) else: input_path = None # Initialize input feed and load data input_feed = InputFeeder(input_type, input_path) input_feed.load_data() avg_inf_time = {"fd": [], "ld": [], "hp": [], "gd": []} for ret, frame in input_feed.next_batch(): if not ret: break show_frame = frame outs_fd, fd_inf_time = fd.predict(frame.copy(), prob) if len(outs_fd) == 0: continue start_point = outs_fd[0] end_point = outs_fd[1] cropped_face = crop_face(start_point, end_point, frame) # predict facial landmark on cropped image outs_ld, ld_inf_time = ld.predict(cropped_face.copy()) if len(outs_ld) == 0: continue # extract coordinates for left and right eye p1 = tuple(sum(x) for x in zip(outs_ld[0][0], start_point)) p2 = tuple(sum(x) for x in zip(outs_ld[0][1], start_point)) p3 = tuple(sum(x) for x in zip(outs_ld[0][2], start_point)) p4 = tuple(sum(x) for x in zip(outs_ld[0][3], start_point)) start_left_bb = tuple(sum(x) for x in zip(outs_ld[0][4], start_point)) end_left_bb = tuple(sum(x) for x in zip(outs_ld[0][5], start_point)) start_right_bb = tuple(sum(x) for x in zip(outs_ld[0][6], start_point)) end_right_bb = tuple(sum(x) for x in zip(outs_ld[0][7], start_point)) left_eye, right_eye = crop_eyes( frame.copy(), (start_left_bb, end_left_bb, start_right_bb, end_right_bb)) # pitch, roll and yaw estimation on cropped face outs_hp, hp_inf_time = hp.predict(cropped_face.copy()) p, r, y = outs_hp # gaze estimation outs_gd, gd_inf_time = gd.predict(left_eye, right_eye, np.array([[y, p, r]])) # adding inference time to dictionary avg_inf_time["fd"].append(fd_inf_time) avg_inf_time["ld"].append(ld_inf_time) avg_inf_time["hp"].append(hp_inf_time) avg_inf_time["gd"].append(gd_inf_time) ## Control Mouse pointer mc = MouseController("high", "fast") if len(outs_gd) == 0: continue mc.move(outs_gd[0], outs_gd[1]) if len(visualization_list) != 0: show_visualization( frame, visualization_list, start_point, end_point, (start_left_bb, end_left_bb, start_right_bb, end_right_bb), [p1, p2, p3, p4], (p, r, y), outs_gd) key = cv2.waitKey(1) if key == ord('q'): break logging.debug( "Average inf. time are fd : {}, ld : {}, hp : {}, gd : {}".format( sum(avg_inf_time["fd"]) / len(avg_inf_time["fd"]), sum(avg_inf_time["ld"]) / len(avg_inf_time["ld"]), sum(avg_inf_time["hp"]) / len(avg_inf_time["hp"]), sum(avg_inf_time["gd"]) / len(avg_inf_time["gd"]))) logging.debug( "Total inf. time are fd : {}, ld : {}, hp : {}, gd : {}".format( sum(avg_inf_time["fd"]), sum(avg_inf_time["ld"]), sum(avg_inf_time["hp"]), sum(avg_inf_time["gd"]))) logging.debug("FPS time are fd : {}, ld : {}, hp : {}, gd : {}".format( 1 / (sum(avg_inf_time["fd"]) / len(avg_inf_time["fd"])), 1 / (sum(avg_inf_time["ld"]) / len(avg_inf_time["ld"])), 1 / (sum(avg_inf_time["hp"]) / len(avg_inf_time["hp"])), 1 / (sum(avg_inf_time["gd"]) / len(avg_inf_time["gd"])))) logging.info("Stream Ended") cv2.destroyAllWindows() input_feed.close()
class Application: def __init__(self): self.args = None self.feed = None self.face_detection_model = None self.facial_landmark_detection_model = None self.gaze_estimation_model = None self.head_pose_estimation_model = None self.frame = None self.width = None self.Height = None self.mc = MouseController("high", "fast") self.face_detection_load_time = 0 self.facial_landmark_detection_load_time = 0 self.gaze_estimation_load_time = 0 self.head_pose_estimation_load_time = 0 self.face_detection_infer_time = 0 self.facial_landmark_detection_infer_time = 0 self.gaze_estimation_infer_time = 0 self.head_pose_estimation_infer_time = 0 self.frames = 0 def initialize_argparser(self): """ Parse command line arguments. :return: command line arguments """ parser = ArgumentParser() parser.add_argument("-t", "--input-type", required=True, type=str, help="Type of input (video or cam)") parser.add_argument("-i", "--input", required=True, type=str, help="Input file") parser.add_argument("-o", "--out", type=str, default=None, help="Output file with the processed content") parser.add_argument("-p", "--preview", action='store_true', default=False, help="Should preview face and eyes") parser.add_argument("--notmove", action='store_true', default=False, help="Should not move mouse") parser.add_argument( "-m", "--model", type=str, default="FP32", help="Model precision to use. One of FP32, FP16 or FP16-INT8") parser.add_argument( "-d", "--device", type=str, default="CPU", help="Device used to process model. One or CPU or GPU") parser.add_argument("-v", "--verbose", action='store_true', default=False, help="Enable DEBUG messages") self.args = parser.parse_args() def initialize_logging(self): if self.args.verbose: log.basicConfig(level=log.DEBUG) else: log.basicConfig(level=log.ERROR) def initialize_feed(self): self.feed = InputFeeder(self.args.input_type, self.args.input) self.feed.load_data() def initialize_window(self): if self.args.preview: cv2.namedWindow('preview') cv2.namedWindow('face') cv2.namedWindow('left eye') cv2.namedWindow('right eye') cv2.namedWindow('gaze') def show_main_frame(self): cv2.imshow('preview', self.frame) def esc_key_pressed(self): key_pressed = cv2.waitKey(1) if key_pressed == 27: return True def infer_face(self): start = time.time() face_frame = self.face_detection_model.predict(self.frame) self.face_detection_infer_time += time.time() - start return face_frame def infer_eyes(self, face_frame, show=False): start = time.time() left_eye_pos, right_eye_pos, left_eye, right_eye = self.facial_landmark_detection_model.predict( face_frame) self.facial_landmark_detection_infer_time += time.time() - start if show: tmp_face = face_frame.copy() cv2.circle(tmp_face, (left_eye_pos[0], left_eye_pos[1]), 5, (0, 255, 0)) cv2.circle(tmp_face, (right_eye_pos[0], right_eye_pos[1]), 5, (0, 255, 0)) cv2.imshow('face', tmp_face) cv2.imshow('left eye', left_eye) cv2.imshow('right eye', right_eye) return left_eye, right_eye def infer_pose(self, face_frame, show=False): start = time.time() yaw, pitch, roll = self.head_pose_estimation_model.predict(face_frame) self.head_pose_estimation_infer_time += time.time() - start return yaw, pitch, roll def infer_gaze(self, cropped_left_eye, cropped_right_eye, yaw, pitch, roll, show=False): start = time.time() gaze = self.gaze_estimation_model.predict(cropped_left_eye, cropped_right_eye, yaw, pitch, roll) self.gaze_estimation_infer_time += time.time() - start if show: img = np.zeros([100, 100, 3], dtype=np.uint8) img.fill(255) cv2.circle(img, (50, 50), 50, (0, 255, 0)) cv2.arrowedLine(img, (50, 50), (50 + int(gaze[0] * 70), 50 + int(-gaze[1] * 70)), (255, 0, 0), 2) cv2.imshow('gaze', img) return gaze def infer_frame(self): self.show_main_frame() if self.esc_key_pressed(): return False self.frames += 1 face_frame = self.infer_face() if face_frame is not None: cropped_left_eye, cropped_right_eye = self.infer_eyes( face_frame, self.args.preview) yaw, pitch, roll = self.infer_pose(face_frame, self.args.preview) gaze = self.infer_gaze(cropped_left_eye, cropped_right_eye, yaw, pitch, roll, self.args.preview) if not self.args.notmove: self.mc.move(gaze[0], gaze[1]) def process_feed(self): try: for batch in self.feed.next_batch(): self.frame = batch if batch is not None: if self.infer_frame() is False: break else: break log.info("Face detection model load time: {:.2f}ms".format( 1000 * self.face_detection_infer_time)) log.info( "Facial landmark detection model load time: {:.2f}ms".format( 1000 * self.facial_landmark_detection_infer_time)) log.info("Head Pose estimation model load: {:.2f}ms".format( 1000 * self.head_pose_estimation_infer_time)) log.info("Gaze estimation model load time: {:.2f}ms".format( 1000 * self.gaze_estimation_infer_time)) log.info( "Face detection model inference mean time: {:.2f}ms".format( 1000 * self.face_detection_infer_time / self.frames)) log.info( "Facial landmark detection model inference mean time: {:.2f}ms" .format(1000 * self.facial_landmark_detection_infer_time / self.frames)) log.info( "Head Pose estimation model inference mean time: {:.2f}ms". format(1000 * self.head_pose_estimation_infer_time / self.frames)) log.info( "Gaze estimation model inference mean time: {:.2f}ms".format( 1000 * self.gaze_estimation_infer_time / self.frames)) except Exception as err: log.error("Could not infer. Cause: ", str(err)) def initialize_models(self): try: model_precision = self.args.model.upper() self.face_detection_model = Model_Face_Detection( "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001" ) start = time.time() self.face_detection_model.load_model() self.face_detection_load_time = time.time() - start self.facial_landmark_detection_model = Model_Facial_Landmark_Detection( f"models/intel/landmarks-regression-retail-0009/{model_precision}/landmarks-regression-retail-0009", self.args.device.upper()) start = time.time() self.facial_landmark_detection_model.load_model() self.facial_landmark_detection_load_time = time.time() - start self.head_pose_estimation_model = Model_Head_Pose_estimation( f"models/intel/head-pose-estimation-adas-0001/{model_precision}/head-pose-estimation-adas-0001", self.args.device.upper()) start = time.time() self.head_pose_estimation_model.load_model() self.head_pose_estimation_load_time = time.time() - start self.gaze_estimation_model = Model_Gaze_Estimation( f"models/intel/gaze-estimation-adas-0002/{model_precision}/gaze-estimation-adas-0002", self.args.device.upper()) start = time.time() self.gaze_estimation_model.load_model() self.gaze_estimation_load_time = time.time() - start except Exception as err: log.error("Could not load model. Cause: ", str(err)) def run(self): self.initialize_argparser() self.initialize_logging() self.initialize_models() self.initialize_feed() self.initialize_window() self.process_feed() self.feed.close()
def main(): args = build_argparser().parse_args() previewFlags = args.previewFlags logger = logging.getLogger() inputFile = args.input inputFeeder = None if inputFile.lower()=="cam": inputFeeder=InputFeeder("cam") if not os.path.isfile(inputFile): logger.error("Unable to find input file") exit(1) inputFeeder=InputFeeder("video",inputFile) mfd=Model_Face_Detection(args.facedetectionmodel,args.device,args.cpu_extension) mfld=Model_Facial_Landmarks_Detection(args.faciallandmarkmodel,args.device,args.cpu_extension) mge=Model_Gaze_Estimation(args.gazeestimationmodel,args.device,args.cpu_extension) mhpe=Model_Head_Pose_Estimation(args.headposemodel,args.device,args.cpu_extension) mc = MouseController('medium','fast') #inputFeeder=InputFeeder("cam") inputFeeder.load_data() mfd.load_model() mfld.load_model() mge.load_model() mhpe.load_model() frame_count = 0 for ret, frame in inputFeeder.next_batch(): if frame is not None: frame_count+=1 if frame_count%5==0: cv2.imshow('video',cv2.resize(frame,(500,500))) key = cv2.waitKey(60) croppedFace, face_coords = mfd.predict(frame.copy(), args.prob_threshold) if type(croppedFace)==int: logger.error("No face detected.") if key==27: break continue hp_out = mhpe.predict(croppedFace.copy()) left_eye, right_eye, eye_coords = mfld.predict(croppedFace.copy()) #print(left_eye new_mouse_coord, gaze_vector = mge.predict(left_eye, right_eye, hp_out) if (not len(previewFlags)==0): preview_frame = frame.copy() if 'fd' in previewFlags: preview_frame = croppedFace if 'fld' in previewFlags: cv2.rectangle(croppedFace, (eye_coords[0][0]-10, eye_coords[0][1]-10), (eye_coords[0][2]+10, eye_coords[0][3]+10), (0,255,0), 3) cv2.rectangle(croppedFace, (eye_coords[1][0]-10, eye_coords[1][1]-10), (eye_coords[1][2]+10, eye_coords[1][3]+10), (0,255,0), 3) if 'hp' in previewFlags: cv2.putText(preview_frame, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".format(hp_out[0],hp_out[1],hp_out[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1) if 'ge' in previewFlags: x, y, w = int(gaze_vector[0]*12), int(gaze_vector[1]*12), 160 le =cv2.line(left_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2) cv2.line(le, (x-w, y+w), (x+w, y-w), (255,0,255), 2) re = cv2.line(right_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2) cv2.line(re, (x-w, y+w), (x+w, y-w), (255,0,255), 2) croppedFace[eye_coords[0][1]:eye_coords[0][3],eye_coords[0][0]:eye_coords[0][2]] = le croppedFace[eye_coords[1][1]:eye_coords[1][3],eye_coords[1][0]:eye_coords[1][2]] = re cv2.imshow("visualization",cv2.resize(preview_frame,(500,500))) if frame_count%5==0: mc.move(new_mouse_coord[0],new_mouse_coord[1]) if key==27: break logger.error("video ended...") cv2.destroyAllWindows() inputFeeder.close()
def run_app(args): face_detection_model = Model_Face_Detection(args.model_path_fd, args.device, args.cpu_extension, threshold=args.threshold) face_detection_model.load_model() head_pose_model = Model_Head_Pose_Estimation(args.model_path_hp, args.device, args.cpu_extension) head_pose_model.load_model() face_landmark_model = Model_Facial_Landmarks(args.model_path_fl, args.device, args.cpu_extension) face_landmark_model.load_model() gaze_model = Model_Gaze_Estimation(args.model_path_ge, args.device, args.cpu_extension) gaze_model.load_model() input_feeder = InputFeeder( args.input_type, args.input_file, ) input_feeder.load_data() mouse_controller = MouseController("medium", "fast") # while input_feeder.cap.isOpened(): # feed_out=input_feeder.next_batch() frame_count = 0 custom = args.toggle for frame in input_feeder.next_batch(): if frame is None: break key_pressed = cv2.waitKey(60) frame_count += 1 face_out, cords = face_detection_model.predict(frame.copy()) # When no face was detected if cords == 0: inf_info = "No Face Detected in the Frame" write_text_img(frame, inf_info, 400) continue eyes_cords, left_eye, right_eye = face_landmark_model.predict( face_out.copy()) head_pose_out = head_pose_model.predict(face_out.copy()) gaze_out = gaze_model.predict(left_eye, right_eye, head_pose_out) # Faliure in processing both eyes if gaze_out is None: continue x, y = gaze_out if frame_count % 5 == 0: mouse_controller.move(x, y) inf_info = "Head Pose (y: {:.2f}, p: {:.2f}, r: {:.2f})".format( head_pose_out[0], head_pose_out[1], head_pose_out[2]) # Process Visualization if 'frame' in custom: visualization(frame, cords, face_out, eyes_cords) if 'stats' in custom: write_text_img(face_out, inf_info, 400) inf_info = "Gaze Angle: x: {:.2f}, y: {:.2f}".format(x, y) log.info("Statistic " + inf_info) write_text_img(face_out, inf_info, 400, 15) if 'gaze' in custom: display_head_pose(frame, head_pose_out, cords) out_f = np.hstack( (cv2.resize(frame, (400, 400)), cv2.resize(face_out, (400, 400)))) cv2.imshow('Visualization', out_f) if key_pressed == 27: break input_feeder.close() cv2.destroyAllWindows()