Exemplo n.º 1
0
    def compute_eye_gaze_estimation(self, subject_id, timestamp, input_r,
                                    input_l):
        """
        subject_id : integer,  id of the subject
        input_x    : cv_image, input image of x eye
        (phi_x)    : double,   phi angle estimated using pupil detection
        (theta_x)  : double,   theta angle estimated using pupil detection
        """
        try:
            lct = self.tf_listener.getLatestCommonTime(
                self.rgb_frame_id_ros, self.headpose_frame + str(subject_id))
            if (timestamp - lct).to_sec() < 0.25:
                tqdm.write('Time diff: ' + str((timestamp - lct).to_sec()))

                (trans_head, rot_head) = self.tf_listener.lookupTransform(
                    self.rgb_frame_id_ros,
                    self.headpose_frame + str(subject_id), lct)
                euler_angles_head = gaze_tools.get_head_pose(
                    trans_head, rot_head)

                phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(
                    euler_angles_head)
                self.last_phi_head, self.last_theta_head = phi_head, theta_head
            else:
                if self.use_last_headpose and self.last_phi_head is not None:
                    tqdm.write('Big time diff, use last known headpose! ' +
                               str((timestamp - lct).to_sec()))
                    phi_head, theta_head = self.last_phi_head, self.last_theta_head
                else:
                    tqdm.write(
                        'Too big time diff for head pose, do not estimate gaze!'
                        + str((timestamp - lct).to_sec()))
                    return

            start_time = time.time()

            est_gaze_c = self.estimate_gaze_twoeyes(
                input_l, input_r, np.array([theta_head, phi_head]))

            self.gaze_buffer_c[subject_id].append(est_gaze_c)

            if len(self.average_weights) == len(
                    self.gaze_buffer_c[subject_id]):
                est_gaze_c_med = np.average(np.array(
                    self.gaze_buffer_c[subject_id]),
                                            axis=0,
                                            weights=self.average_weights)
                self.publish_gaze(est_gaze_c_med, timestamp, subject_id)
                tqdm.write('est_gaze_c: ' + str(est_gaze_c_med))
                return est_gaze_c_med

            tqdm.write('Elapsed: ' + str(time.time() - start_time))
        except (tf.LookupException, tf.ConnectivityException,
                tf.ExtrapolationException, tf.Exception) as tf_e:
            print(tf_e)
        except rospy.ROSException as ros_e:
            if str(ros_e) == "publish() to a closed topic":
                print("See ya")
        return None
Exemplo n.º 2
0
    def image_callback(self, subject_image_list):
        """This method is called whenever new input arrives. The input is first converted in a format suitable
        for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see
        :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`),
        and this image is published along with the estimated gaze vector (see :meth:`publish_image` and
        :func:`publish_gaze`)"""
        timestamp = subject_image_list.header.stamp

        subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list)
        input_r_list = []
        input_l_list = []
        input_head_list = []
        valid_subject_list = []
        for subject_id, s in subjects_dict.items():
            try:
                transform_msg = self.tf2_buffer.lookup_transform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp)
                rot_head = transform_msg.transform.rotation
                euler_angles_head = list(transformations.euler_from_quaternion([rot_head.x, rot_head.y, rot_head.z, rot_head.w]))
                euler_angles_head = gaze_tools.limit_yaw(euler_angles_head)

                phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head)
                input_head_list.append([theta_head, phi_head])
                input_r_list.append(self.input_from_image(s.right))
                input_l_list.append(self.input_from_image(s.left))
                valid_subject_list.append(subject_id)
            except (tf2_ros.LookupException, tf2_ros.ConnectivityException, tf2_ros.ExtrapolationException, tf2_ros.TransformException):
                pass

        if len(valid_subject_list) == 0:
            return

        gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                              inference_input_right_list=input_r_list,
                                              inference_headpose_list=input_head_list)

        subjects_gaze_img_list = []
        for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()):
            self.publish_gaze(gaze, timestamp, subject_id)

            if self.visualise_eyepose:
                s = subjects_dict[subject_id]
                r_gaze_img = self.visualize_eye_result(s.right, gaze)
                l_gaze_img = self.visualize_eye_result(s.left, gaze)
                s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
                subjects_gaze_img_list.append(s_gaze_img)

        if len(subjects_gaze_img_list) > 0:
            gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8")
            gaze_img_msg.header.stamp = timestamp
            self.subjects_gaze_img.publish(gaze_img_msg)

        _now = rospy.Time().now()
        _freq = 1.0 / (_now - self._last_time).to_sec()
        self._freq_deque.append(_freq)
        self._latency_deque.append(_now.to_sec() - timestamp.to_sec())
        self._last_time = _now
        tqdm.write(
            '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m'.format(
                (_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(valid_subject_list), np.mean(self._freq_deque)), end="\r")
    def process_image(self, color_msg):
        color_img = gaze_tools.convert_image(color_msg, "bgr8")
        timestamp = color_msg.header.stamp

        self.update_subject_tracker(color_img)

        if not self.subject_tracker.get_tracked_elements():
            tqdm.write("\033[2K\033[1;31mNo face found\033[0m", end="\r")
            return

        self.subject_tracker.update_eye_images(self.eye_image_size)

        final_head_pose_images = []
        for subject_id, subject in self.subject_tracker.get_tracked_elements(
        ).items():
            if subject.left_eye_color is None or subject.right_eye_color is None:
                continue
            if subject_id not in self.pose_stabilizers:
                self.pose_stabilizers[subject_id] = [
                    Stabilizer(state_num=2,
                               measure_num=1,
                               cov_process=0.1,
                               cov_measure=0.1) for _ in range(6)
                ]

            success, head_rpy, translation_vector = self.get_head_pose(
                subject.marks, subject_id)

            if success:
                # Publish all the data
                self.publish_pose(timestamp, translation_vector, head_rpy,
                                  subject_id)

                if self.visualise_headpose:
                    roll_pitch_yaw = gaze_tools.limit_yaw(head_rpy)
                    face_image_resized = cv2.resize(
                        subject.face_color,
                        dsize=(224, 224),
                        interpolation=cv2.INTER_CUBIC)

                    final_head_pose_images.append(
                        LandmarkMethodROS.visualize_headpose_result(
                            face_image_resized,
                            gaze_tools.get_phi_theta_from_euler(
                                roll_pitch_yaw)))

        if len(self.subject_tracker.get_tracked_elements().items()) > 0:
            self.publish_subject_list(
                timestamp, self.subject_tracker.get_tracked_elements())

        if len(final_head_pose_images) > 0:
            headpose_image_ros = self.bridge.cv2_to_imgmsg(
                np.hstack(final_head_pose_images), "bgr8")
            headpose_image_ros.header.stamp = timestamp
            self.subject_faces_pub.publish(headpose_image_ros)
Exemplo n.º 4
0
    def image_callback(self, subject_image_list):
        """This method is called whenever new input arrives. The input is first converted in a format suitable
        for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see
        :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`),
        and this image is published along with the estimated gaze vector (see :meth:`publish_image` and
        :func:`publish_gaze`)"""
        timestamp = subject_image_list.header.stamp

        subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list)
        input_r_list = []
        input_l_list = []
        input_head_list = []
        valid_subject_list = []
        for subject_id, s in subjects_dict.items():
            try:
                (trans_head, rot_head) = self.tf_listener.lookupTransform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp)
                euler_angles_head = list(tf.transformations.euler_from_quaternion(rot_head))
                euler_angles_head = gaze_tools.limit_yaw(euler_angles_head)

                phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head)
                input_head_list.append([theta_head, phi_head])
                input_r_list.append(self.input_from_image(s.right))
                input_l_list.append(self.input_from_image(s.left))
                valid_subject_list.append(subject_id)
            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException, tf.Exception):
                pass

        if len(valid_subject_list) == 0:
            return

        gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                              inference_input_right_list=input_r_list,
                                              inference_headpose_list=input_head_list)

        subjects_gaze_img_list = []
        for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()):
            self.publish_gaze(gaze, timestamp, subject_id)

            if self.visualise_eyepose:
                s = subjects_dict[subject_id]
                r_gaze_img = self.visualize_eye_result(s.right, gaze)
                l_gaze_img = self.visualize_eye_result(s.left, gaze)
                s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
                subjects_gaze_img_list.append(s_gaze_img)

        if len(subjects_gaze_img_list) > 0:
            gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8")
            gaze_img_msg.header.stamp = timestamp
            self.subjects_gaze_img.publish(gaze_img_msg)
Exemplo n.º 5
0
    def process_image(self, color_msg):
        tqdm.write('Time now: ' + str(rospy.Time.now().to_sec()) +
                   ' message color: ' + str(color_msg.header.stamp.to_sec()) +
                   ' diff: ' + str(rospy.Time.now().to_sec() -
                                   color_msg.header.stamp.to_sec()))

        start_time = time.time()

        color_img = gaze_tools.convert_image(color_msg, "bgr8")
        timestamp = color_msg.header.stamp

        self.detect_landmarks(color_img, timestamp)  # update self.subjects

        tqdm.write('Elapsed after detecting transformed_landmarks: ' +
                   str(time.time() - start_time))

        if not self.subjects:
            tqdm.write("No face found")
            return

        self.get_eye_image()  # update self.subjects

        final_head_pose_images = None
        for subject_id, subject in self.subjects.items():
            if subject.left_eye_color is None or subject.right_eye_color is None:
                continue
            if subject_id not in self.last_rvec:
                self.last_rvec[subject_id] = np.array([[0.01891013],
                                                       [0.08560084],
                                                       [-3.14392813]])
            if subject_id not in self.last_tvec:
                self.last_tvec[subject_id] = np.array([[-14.97821226],
                                                       [-10.62040383],
                                                       [-2053.03596872]])
            if subject_id not in self.pose_stabilizers:
                self.pose_stabilizers[subject_id] = [
                    Stabilizer(state_num=2,
                               measure_num=1,
                               cov_process=0.1,
                               cov_measure=0.1) for _ in range(6)
                ]

            success, rotation_vector, translation_vector = self.get_head_pose(
                subject.marks, subject_id)

            # Publish all the data
            translation_headpose_tf = self.get_head_translation(
                timestamp, subject_id)

            if success:
                if translation_headpose_tf is not None:
                    euler_angles_head = self.publish_pose(
                        timestamp, translation_headpose_tf, rotation_vector,
                        subject_id)

                    if euler_angles_head is not None:
                        head_pose_image = self.visualize_headpose_result(
                            subject.face_color,
                            gaze_tools.get_phi_theta_from_euler(
                                euler_angles_head))
                        head_pose_image_resized = cv2.resize(
                            head_pose_image,
                            dsize=(224, 224),
                            interpolation=cv2.INTER_CUBIC)

                        if final_head_pose_images is None:
                            final_head_pose_images = head_pose_image_resized
                        else:
                            final_head_pose_images = np.concatenate(
                                (final_head_pose_images,
                                 head_pose_image_resized),
                                axis=1)
            else:
                if not success:
                    tqdm.write("Could not get head pose properly")

        if final_head_pose_images is not None:
            self.publish_subject_list(timestamp, self.subjects)
            headpose_image_ros = self.bridge.cv2_to_imgmsg(
                final_head_pose_images, "bgr8")
            headpose_image_ros.header.stamp = timestamp
            self.subject_faces_pub.publish(headpose_image_ros)

        tqdm.write('Elapsed total: ' + str(time.time() - start_time) + '\n\n')
Exemplo n.º 6
0
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(
        color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(
            landmark_estimator.model_points,
            subject.landmarks.reshape(len(subject.landmarks), 1, 2),
            cameraMatrix=camera_matrix,
            distCoeffs=dist_coefficients,
            flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write(
                'Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(
            _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color,
                                        dsize=(224, 224),
                                        interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(
            face_image_resized, (phi_head, theta_head))

        if args.vis_headpose:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_headpose:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_headpose.jpg'),
                head_pose_image)

        input_r_list.append(
            gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(
            gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(
        inference_input_left_list=input_l_list,
        inference_input_right_list=input_r_list,
        inference_headpose_list=input_head_list)

    for subject_id, gaze, headpose in zip(valid_subject_list,
                                          gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(
            subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(
            subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args.vis_gaze:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_gaze:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_gaze.jpg'),
                s_gaze_img)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)

        if args.save_estimate:
            with open(
                    os.path.join(
                        args.output_path,
                        os.path.splitext(base_name)[0] + '_output.txt'),
                    'w+') as f:
                f.write(
                    os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) +
                    ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) +
                    ', ' + str(gaze[0]) + ']' + '\n')
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.marks.reshape(len(subject.marks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        roll_pitch_yaw = [-rotation_vector[2], -rotation_vector[0], rotation_vector[1] + np.pi]
        roll_pitch_yaw = limit_yaw(np.array(roll_pitch_yaw).flatten().tolist())

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        if args.vis_headpose:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_headpose:
            cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)

        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,
                                                    inference_headpose_list=input_head_list)

    for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args.vis_gaze:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_gaze:
            cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
Exemplo n.º 8
0
                if args.eye_gaze == 1:
                    _rotation_matrix = np.matmul(rmat, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m = np.zeros((4, 4))
                    _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m[:3, :3] = _rotation_matrix
                    _m[3, 3] = 1
                    # Go from camera space to ROS space
                    _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                                    [-1.0, 0.0, 0.0, 0.0],
                                    [0.0, -1.0, 0.0, 0.0],
                                    [0.0, 0.0, 0.0, 1.0]]

                    roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
                    roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

                    phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)


                    # le_c, re_c, _, _ = get_eye_image_from_landmarks(f.bbox, get_image_from_bb(frame, f.bbox), f.lms, (60,36))
                    re_c = get_eye_area(eye_gaze_frame, [f.lms[36], f.lms[37], f.lms[38], f.lms[39], f.lms[40], f.lms[41]])
                    le_c = get_eye_area(eye_gaze_frame, [f.lms[42], f.lms[43], f.lms[44], f.lms[45], f.lms[46], f.lms[47]])

                    r_eye_roi_resize.append(input_from_image(re_c))
                    l_eye_roi_resize.append(input_from_image(le_c))
                    head_list.append([phi_head, theta_head])

                #########
                frame_count += 1
                if not log is None:
                    log.write(f"{frame_count},{now},{width},{height},{args.fps},{face_num},{f.id},{f.eye_blink[0]},{f.eye_blink[1]},{f.conf},{f.success},{f.pnp_error},{f.quaternion[0]},{f.quaternion[1]},{f.quaternion[2]},{f.quaternion[3]},{f.euler[0]},{f.euler[1]},{f.euler[2]},{f.rotation[0]},{f.rotation[1]},{f.rotation[2]},{f.translation[0]},{f.translation[1]},{f.translation[2]}")
                for (x,y,c) in f.lms:
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label):
    global FPS


    cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    start = time.time()
    #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y]
    faceboxes = landmark_estimator.get_face_bb(color_img)

    #draw bounding box
    for facebox in faceboxes:
        left = int(facebox[0])
        top = int(facebox[1])
        right = int(facebox[2])
        bottom = int(facebox[3])
        cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2)


    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        # if EVAL and not SHIFT:
            # head_phi.append(-100)
            # head_theta.append(-100)
            # gaze_phi.append(-100)
            # gaze_theta.append(-100)
        return

    # check_people(faceboxes)

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    people_count = 1;
    frame_img =color_img

    for idx, subject in enumerate(subjects):
        people_idx = get_people(faceboxes[idx][0])
        people_list[people_idx].set_bbox_l(faceboxes[idx][0])

        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                          [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
        # if EVAL:
            # head_phi.append(phi_head)
            # head_theta.append(theta_head)
        # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        #color_image의 facebox에 headpose vector를 그림.
        if EVAL:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label)
        else:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx])

        frame_img = head_pose_image
        if EVAL:
            headpose_error.append(headpose_err)
            print("head pose error:",headpose_err)

        if args.mode =='image':
            #show headpose
            # if args.vis_headpose:
            #     plt.axis("off")
            #     plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            #     plt.show()

            if args.save_headpose:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image)
        people_count +=1
        #size 등 format 변경.
        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)


    # if args.mode =='video':
    #     # plt.axis("off")
    #     # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
    #     # plt.show()
    #     headpose_out_video.write(frame_img)

    if len(valid_subject_list) == 0:
        return

    # returns [subject : [gaze_pose]]
    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,

                                                    inference_headpose_list=input_head_list)
    people_count = 1
    for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        facebox = faceboxes[subject_id]
        people_idx = get_people(facebox[0])
        # Build visualizations
        # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        # if EVAL:
        #     gaze_theta.append(gaze[0])
        #     gaze_phi.append(gaze[1])
        if EVAL:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label)
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label)
        else:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r")
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l")

        frame_img = l_gaze_img
        if EVAL:
            print("right gaze error:",r_gaze_err)
            print("left gaze error:",l_gaze_err)
            gaze_error.append(r_gaze_err)
            gaze_error.append(l_gaze_err)

        #show gaze image
        # if args.vis_gaze:
        #     plt.axis("off")
        #     plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
        #     plt.show()
        if args.mode =='image':
            if args.save_gaze:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)


        if args.save_estimate:
            with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +

                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
        people_count +=1
    if args.mode =='video':
        out_video.write(frame_img)
    end = time.time()
    delay_time = end-start
    FPS = str(int(1/delay_time))