def main(): """ Load the network and parse the output. :return: None """ global INFO global DELAY global POSE_CHECKED #controller = MouseController() log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = args_parser().parse_args() logger = log.getLogger() if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) out = cv2.VideoWriter(os.path.join(args.output_dir, "shopper.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) frame_count = 0 job_id = 1 #os.environ['PBS_JOBID'] progress_file_path = os.path.join(args.output_dir, 'i_progress_' + str(job_id) + '.txt') infer_time_start = time.time() if input_stream: cap.open(args.input) # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") return # Initialise the class if args.cpu_extension: facedet = FaceDetection(args.facemodel, args.confidence, extensions=args.cpu_extension) posest = HeadPoseEstimation(args.posemodel, args.confidence, extensions=args.cpu_extension) landest = FaceLandmarksDetection(args.landmarksmodel, args.confidence, extensions=args.cpu_extension) gazeest = GazeEstimation(args.gazemodel, args.confidence, extensions=args.cpu_extension) else: facedet = FaceDetection(args.facemodel, args.confidence) posest = HeadPoseEstimation(args.posemodel, args.confidence) landest = FaceLandmarksDetection(args.landmarksmodel, args.confidence) gazeest = GazeEstimation(args.gazemodel, args.confidence) # infer_network_pose = Network() # Load the network to IE plugin to get shape of input layer facedet.load_model() posest.load_model() landest.load_model() gazeest.load_model() print("loaded models") ret, frame = cap.read() while ret: looking = 0 POSE_CHECKED = False ret, frame = cap.read() frame_count += 1 if not ret: print("checkpoint *BREAKING") break if frame is None: log.error("checkpoint ERROR! blank FRAME grabbed") break initial_w = int(cap.get(3)) initial_h = int(cap.get(4)) # Start asynchronous inference for specified request inf_start_fd = time.time() # Results of the output layer of the network coords, frame = facedet.predict(frame) det_time_fd = time.time() - inf_start_fd if len(coords) > 0: [xmin, ymin, xmax, ymax] = coords[0] # use only the first detected face head_pose = frame[ymin:ymax, xmin:xmax] inf_start_hp = time.time() is_looking, pose_angles = posest.predict(head_pose) if is_looking: det_time_hp = time.time() - inf_start_hp POSE_CHECKED = True #print(is_looking) inf_start_lm = time.time() coords, f = landest.predict(head_pose) frame[ymin:ymax, xmin:xmax] = f det_time_lm = time.time() - inf_start_lm [[xlmin, ylmin, xlmax, ylmax], [xrmin, yrmin, xrmax, yrmax]] = coords left_eye_image = frame[ylmin:ylmax, xlmin:xlmax] right_eye_image = frame[yrmin:yrmax, xrmin:xrmax] output = gazeest.predict(left_eye_image, right_eye_image, pose_angles) # Draw performance stats inf_time_message = "Face Inference time: {:.3f} ms.".format( det_time_fd * 1000) if POSE_CHECKED: cv2.putText( frame, "Head pose Inference time: {:.3f} ms.".format( det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) out.write(frame) print("frame", frame_count) if frame_count % 10 == 0: print(time.time() - infer_time_start) progressUpdate(progress_file_path, int(time.time() - infer_time_start), frame_count, video_len) if args.output_dir: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1)) + '\n') f.write(str(frame_count) + '\n') facedet.clean() posest.clean() landest.clean() gazeest.clean() out.release() cap.release() cv2.destroyAllWindows()
def main(): """ Load the network and parse the output. :return: None """ global POSE_CHECKED log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = args_parser().parse_args() logger = log.getLogger() if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) out = cv2.VideoWriter(os.path.join(args.output_dir, "output.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) if args.write_intermediate == 'yes': out_fm = cv2.VideoWriter( os.path.join(args.output_dir, "output_fm.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) out_lm = cv2.VideoWriter( os.path.join(args.output_dir, "output_lm.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) out_pm = cv2.VideoWriter( os.path.join(args.output_dir, "output_pm.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) out_gm = cv2.VideoWriter( os.path.join(args.output_dir, "output_gm.mp4"), cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True) frame_count = 0 job_id = 1 infer_time_start = time.time() if input_stream: cap.open(args.input) # Adjust DELAY to match the number of FPS of the video file if not cap.isOpened(): logger.error("ERROR! Unable to open video source") return if args.mode == 'sync': async_mode = False else: async_mode = True # Initialise the class if args.cpu_extension: face_det = FaceDetection(args.facemodel, args.confidence, extensions=args.cpu_extension, async_mode=async_mode) pose_det = HeadPoseEstimation(args.posemodel, args.confidence, extensions=args.cpu_extension, async_mode=async_mode) land_det = FaceLandmarksDetection(args.landmarksmodel, args.confidence, extensions=args.cpu_extension, async_mode=async_mode) gaze_est = GazeEstimation(args.gazemodel, args.confidence, extensions=args.cpu_extension, async_mode=async_mode) else: face_det = FaceDetection(args.facemodel, args.confidence, async_mode=async_mode) pose_det = HeadPoseEstimation(args.posemodel, args.confidence, async_mode=async_mode) land_det = FaceLandmarksDetection(args.landmarksmodel, args.confidence, async_mode=async_mode) gaze_est = GazeEstimation(args.gazemodel, args.confidence, async_mode=async_mode) # infer_network_pose = Network() # Load the network to IE plugin to get shape of input layer face_det.load_model() pose_det.load_model() land_det.load_model() gaze_est.load_model() model_load_time = time.time() - infer_time_start print("All models are loaded successfully") try: pass except Exception as e: print("Could not run Inference: ", e) while cap.isOpened(): ret, frame = cap.read() if not ret: print("checkpoint *BREAKING") break frame_count += 1 looking = 0 POSE_CHECKED = False if frame is None: log.error("checkpoint ERROR! blank FRAME grabbed") break initial_w = int(cap.get(3)) initial_h = int(cap.get(4)) # Start asynchronous inference for specified request inf_start_fd = time.time() # Results of the output layer of the network coords, frame = face_det.predict(frame) if args.write_intermediate == 'yes': out_fm.write(frame) det_time_fd = time.time() - inf_start_fd if len(coords) > 0: [xmin, ymin, xmax, ymax] = coords[0] # use only the first detected face head_pose = frame[ymin:ymax, xmin:xmax] inf_start_hp = time.time() is_looking, pose_angles = pose_det.predict(head_pose) if args.write_intermediate == 'yes': p = "Pose Angles {}, is Looking? {}".format( pose_angles, is_looking) cv2.putText(frame, p, (50, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1) out_pm.write(frame) if is_looking: det_time_hp = time.time() - inf_start_hp POSE_CHECKED = True inf_start_lm = time.time() coords, f = land_det.predict(head_pose) frame[ymin:ymax, xmin:xmax] = f if args.write_intermediate == "yes": out_lm.write(frame) det_time_lm = time.time() - inf_start_lm [[xlmin, ylmin, xlmax, ylmax], [xrmin, yrmin, xrmax, yrmax]] = coords left_eye_image = f[ylmin:ylmax, xlmin:xlmax] right_eye_image = f[yrmin:yrmax, xrmin:xrmax] output, gaze_vector = gaze_est.predict(left_eye_image, right_eye_image, pose_angles) if args.write_intermediate == 'yes': p = "Gaze Vector {}".format(gaze_vector) cv2.putText(frame, p, (50, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1) fl = draw_gaze(left_eye_image, gaze_vector) fr = draw_gaze(right_eye_image, gaze_vector) f[ylmin:ylmax, xlmin:xlmax] = fl f[yrmin:yrmax, xrmin:xrmax] = fr # cv2.arrowedLine(f, (xlmin, ylmin), (xrmin, yrmin), (0,0,255), 5) out_gm.write(frame) # Draw performance stats inf_time_message = "Face Inference time: {:.3f} ms.".format( det_time_fd * 1000) # if POSE_CHECKED: cv2.putText( frame, "Head pose Inference time: {:.3f} ms.".format( det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1) out.write(frame) if frame_count % 10 == 0: print("Inference time = ", int(time.time() - infer_time_start)) print('Frame count {} and vidoe len {}'.format( frame_count, video_len)) if args.output_dir: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1)) + '\n') f.write(str(frame_count) + '\n') if args.output_dir: with open(os.path.join(args.output_dir, 'stats.txt'), 'a') as f: f.write(str(round(model_load_time)) + '\n') # Clean all models face_det.clean() pose_det.clean() land_det.clean() gaze_est.clean() # release cv2 cap cap.release() cv2.destroyAllWindows() # release all out writer out.release() if args.write_intermediate == 'yes': out_fm.release() out_pm.release() out_lm.release() out_gm.release()