Exemplo n.º 1
0
    def image_callback(self, subject_image_list):
        """This method is called whenever new input arrives. The input is first converted in a format suitable
        for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see
        :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`),
        and this image is published along with the estimated gaze vector (see :meth:`publish_image` and
        :func:`publish_gaze`)"""
        timestamp = subject_image_list.header.stamp

        subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list)
        input_r_list = []
        input_l_list = []
        input_head_list = []
        valid_subject_list = []
        for subject_id, s in subjects_dict.items():
            try:
                transform_msg = self.tf2_buffer.lookup_transform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp)
                rot_head = transform_msg.transform.rotation
                euler_angles_head = list(transformations.euler_from_quaternion([rot_head.x, rot_head.y, rot_head.z, rot_head.w]))
                euler_angles_head = gaze_tools.limit_yaw(euler_angles_head)

                phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head)
                input_head_list.append([theta_head, phi_head])
                input_r_list.append(self.input_from_image(s.right))
                input_l_list.append(self.input_from_image(s.left))
                valid_subject_list.append(subject_id)
            except (tf2_ros.LookupException, tf2_ros.ConnectivityException, tf2_ros.ExtrapolationException, tf2_ros.TransformException):
                pass

        if len(valid_subject_list) == 0:
            return

        gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                              inference_input_right_list=input_r_list,
                                              inference_headpose_list=input_head_list)

        subjects_gaze_img_list = []
        for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()):
            self.publish_gaze(gaze, timestamp, subject_id)

            if self.visualise_eyepose:
                s = subjects_dict[subject_id]
                r_gaze_img = self.visualize_eye_result(s.right, gaze)
                l_gaze_img = self.visualize_eye_result(s.left, gaze)
                s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
                subjects_gaze_img_list.append(s_gaze_img)

        if len(subjects_gaze_img_list) > 0:
            gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8")
            gaze_img_msg.header.stamp = timestamp
            self.subjects_gaze_img.publish(gaze_img_msg)

        _now = rospy.Time().now()
        _freq = 1.0 / (_now - self._last_time).to_sec()
        self._freq_deque.append(_freq)
        self._latency_deque.append(_now.to_sec() - timestamp.to_sec())
        self._last_time = _now
        tqdm.write(
            '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m'.format(
                (_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(valid_subject_list), np.mean(self._freq_deque)), end="\r")
    def process_image(self, color_msg):
        color_img = gaze_tools.convert_image(color_msg, "bgr8")
        timestamp = color_msg.header.stamp

        self.update_subject_tracker(color_img)

        if not self.subject_tracker.get_tracked_elements():
            tqdm.write("\033[2K\033[1;31mNo face found\033[0m", end="\r")
            return

        self.subject_tracker.update_eye_images(self.eye_image_size)

        final_head_pose_images = []
        for subject_id, subject in self.subject_tracker.get_tracked_elements(
        ).items():
            if subject.left_eye_color is None or subject.right_eye_color is None:
                continue
            if subject_id not in self.pose_stabilizers:
                self.pose_stabilizers[subject_id] = [
                    Stabilizer(state_num=2,
                               measure_num=1,
                               cov_process=0.1,
                               cov_measure=0.1) for _ in range(6)
                ]

            success, head_rpy, translation_vector = self.get_head_pose(
                subject.marks, subject_id)

            if success:
                # Publish all the data
                self.publish_pose(timestamp, translation_vector, head_rpy,
                                  subject_id)

                if self.visualise_headpose:
                    roll_pitch_yaw = gaze_tools.limit_yaw(head_rpy)
                    face_image_resized = cv2.resize(
                        subject.face_color,
                        dsize=(224, 224),
                        interpolation=cv2.INTER_CUBIC)

                    final_head_pose_images.append(
                        LandmarkMethodROS.visualize_headpose_result(
                            face_image_resized,
                            gaze_tools.get_phi_theta_from_euler(
                                roll_pitch_yaw)))

        if len(self.subject_tracker.get_tracked_elements().items()) > 0:
            self.publish_subject_list(
                timestamp, self.subject_tracker.get_tracked_elements())

        if len(final_head_pose_images) > 0:
            headpose_image_ros = self.bridge.cv2_to_imgmsg(
                np.hstack(final_head_pose_images), "bgr8")
            headpose_image_ros.header.stamp = timestamp
            self.subject_faces_pub.publish(headpose_image_ros)
Exemplo n.º 3
0
    def image_callback(self, subject_image_list):
        """This method is called whenever new input arrives. The input is first converted in a format suitable
        for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see
        :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`),
        and this image is published along with the estimated gaze vector (see :meth:`publish_image` and
        :func:`publish_gaze`)"""
        timestamp = subject_image_list.header.stamp

        subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list)
        input_r_list = []
        input_l_list = []
        input_head_list = []
        valid_subject_list = []
        for subject_id, s in subjects_dict.items():
            try:
                (trans_head, rot_head) = self.tf_listener.lookupTransform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp)
                euler_angles_head = list(tf.transformations.euler_from_quaternion(rot_head))
                euler_angles_head = gaze_tools.limit_yaw(euler_angles_head)

                phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head)
                input_head_list.append([theta_head, phi_head])
                input_r_list.append(self.input_from_image(s.right))
                input_l_list.append(self.input_from_image(s.left))
                valid_subject_list.append(subject_id)
            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException, tf.Exception):
                pass

        if len(valid_subject_list) == 0:
            return

        gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                              inference_input_right_list=input_r_list,
                                              inference_headpose_list=input_head_list)

        subjects_gaze_img_list = []
        for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()):
            self.publish_gaze(gaze, timestamp, subject_id)

            if self.visualise_eyepose:
                s = subjects_dict[subject_id]
                r_gaze_img = self.visualize_eye_result(s.right, gaze)
                l_gaze_img = self.visualize_eye_result(s.left, gaze)
                s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
                subjects_gaze_img_list.append(s_gaze_img)

        if len(subjects_gaze_img_list) > 0:
            gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8")
            gaze_img_msg.header.stamp = timestamp
            self.subjects_gaze_img.publish(gaze_img_msg)
Exemplo n.º 4
0
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(
        color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(
            landmark_estimator.model_points,
            subject.landmarks.reshape(len(subject.landmarks), 1, 2),
            cameraMatrix=camera_matrix,
            distCoeffs=dist_coefficients,
            flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write(
                'Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(
            _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color,
                                        dsize=(224, 224),
                                        interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(
            face_image_resized, (phi_head, theta_head))

        if args.vis_headpose:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_headpose:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_headpose.jpg'),
                head_pose_image)

        input_r_list.append(
            gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(
            gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(
        inference_input_left_list=input_l_list,
        inference_input_right_list=input_r_list,
        inference_headpose_list=input_head_list)

    for subject_id, gaze, headpose in zip(valid_subject_list,
                                          gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(
            subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(
            subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args.vis_gaze:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_gaze:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_gaze.jpg'),
                s_gaze_img)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)

        if args.save_estimate:
            with open(
                    os.path.join(
                        args.output_path,
                        os.path.splitext(base_name)[0] + '_output.txt'),
                    'w+') as f:
                f.write(
                    os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) +
                    ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) +
                    ', ' + str(gaze[0]) + ']' + '\n')
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.marks.reshape(len(subject.marks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        roll_pitch_yaw = [-rotation_vector[2], -rotation_vector[0], rotation_vector[1] + np.pi]
        roll_pitch_yaw = limit_yaw(np.array(roll_pitch_yaw).flatten().tolist())

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        if args.vis_headpose:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_headpose:
            cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)

        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,
                                                    inference_headpose_list=input_head_list)

    for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args.vis_gaze:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_gaze:
            cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
Exemplo n.º 6
0
                ######
                if args.eye_gaze == 1:
                    _rotation_matrix = np.matmul(rmat, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m = np.zeros((4, 4))
                    _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m[:3, :3] = _rotation_matrix
                    _m[3, 3] = 1
                    # Go from camera space to ROS space
                    _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                                    [-1.0, 0.0, 0.0, 0.0],
                                    [0.0, -1.0, 0.0, 0.0],
                                    [0.0, 0.0, 0.0, 1.0]]

                    roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
                    roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

                    phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)


                    # le_c, re_c, _, _ = get_eye_image_from_landmarks(f.bbox, get_image_from_bb(frame, f.bbox), f.lms, (60,36))
                    re_c = get_eye_area(eye_gaze_frame, [f.lms[36], f.lms[37], f.lms[38], f.lms[39], f.lms[40], f.lms[41]])
                    le_c = get_eye_area(eye_gaze_frame, [f.lms[42], f.lms[43], f.lms[44], f.lms[45], f.lms[46], f.lms[47]])

                    r_eye_roi_resize.append(input_from_image(re_c))
                    l_eye_roi_resize.append(input_from_image(le_c))
                    head_list.append([phi_head, theta_head])

                #########
                frame_count += 1
                if not log is None:
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label):
    global FPS


    cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    start = time.time()
    #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y]
    faceboxes = landmark_estimator.get_face_bb(color_img)

    #draw bounding box
    for facebox in faceboxes:
        left = int(facebox[0])
        top = int(facebox[1])
        right = int(facebox[2])
        bottom = int(facebox[3])
        cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2)


    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        # if EVAL and not SHIFT:
            # head_phi.append(-100)
            # head_theta.append(-100)
            # gaze_phi.append(-100)
            # gaze_theta.append(-100)
        return

    # check_people(faceboxes)

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    people_count = 1;
    frame_img =color_img

    for idx, subject in enumerate(subjects):
        people_idx = get_people(faceboxes[idx][0])
        people_list[people_idx].set_bbox_l(faceboxes[idx][0])

        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                          [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
        # if EVAL:
            # head_phi.append(phi_head)
            # head_theta.append(theta_head)
        # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        #color_image의 facebox에 headpose vector를 그림.
        if EVAL:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label)
        else:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx])

        frame_img = head_pose_image
        if EVAL:
            headpose_error.append(headpose_err)
            print("head pose error:",headpose_err)

        if args.mode =='image':
            #show headpose
            # if args.vis_headpose:
            #     plt.axis("off")
            #     plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            #     plt.show()

            if args.save_headpose:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image)
        people_count +=1
        #size 등 format 변경.
        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)


    # if args.mode =='video':
    #     # plt.axis("off")
    #     # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
    #     # plt.show()
    #     headpose_out_video.write(frame_img)

    if len(valid_subject_list) == 0:
        return

    # returns [subject : [gaze_pose]]
    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,

                                                    inference_headpose_list=input_head_list)
    people_count = 1
    for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        facebox = faceboxes[subject_id]
        people_idx = get_people(facebox[0])
        # Build visualizations
        # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        # if EVAL:
        #     gaze_theta.append(gaze[0])
        #     gaze_phi.append(gaze[1])
        if EVAL:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label)
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label)
        else:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r")
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l")

        frame_img = l_gaze_img
        if EVAL:
            print("right gaze error:",r_gaze_err)
            print("left gaze error:",l_gaze_err)
            gaze_error.append(r_gaze_err)
            gaze_error.append(l_gaze_err)

        #show gaze image
        # if args.vis_gaze:
        #     plt.axis("off")
        #     plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
        #     plt.show()
        if args.mode =='image':
            if args.save_gaze:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)


        if args.save_estimate:
            with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +

                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
        people_count +=1
    if args.mode =='video':
        out_video.write(frame_img)
    end = time.time()
    delay_time = end-start
    FPS = str(int(1/delay_time))