Exemplo n.º 1
0
def experiment_detect_faces():
    dataset_size = 200
    out_dir = os.path.join(RESULTS_PATH, "manual_face_detection")
    os.makedirs(out_dir, exist_ok=True)

    haarcascade = HaarcascadeFaceDetector()
    hog = HogFaceDetector()

    metadata = _load_metadata()
    paths = get_random_subset_paths(metadata, dataset_size)

    for equal_hist in [False, True]:
        out_subdir = "equal_hist" if equal_hist else "raw_grayscale"
        out_subdir = os.path.join(out_dir, out_subdir)
        os.makedirs(out_subdir, exist_ok=True)

        for i, file_path in tqdm(enumerate(paths)):
            file_path = own_dataset_path_wrapper(file_path)
            image = load_image_by_cv2(file_path)
            image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

            if equal_hist:
                image_gray = cv2.equalizeHist(image_gray)

            faces = haarcascade.detect(image_gray)
            visualize_faces(faces, image, color=(255, 0, 0), show=False)

            faces = hog.detect(image_gray)
            visualize_faces(faces, image, color=(0, 0, 255), show=False)

            path = os.path.join(out_subdir, str(i).zfill(4) + ".jpg")
            im = Image.fromarray(image)
            im.save(path)
def parse_mpiigaze_landmark_coords(person_id: int, day: int, img_n: int, eye_image_width=60, eye_image_height=36):
    face_model = load_face_model()

    person_id_str = str(person_id).zfill(2)
    day_str = str(day).zfill(2)
    img_n_str = str(img_n).zfill(4)
    im = load_image_by_cv2(mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/day{day_str}/{img_n_str}.jpg"))
    ann_path = mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/day{day_str}/annotation.txt")
    annotation = load_annotation(ann_path)
    camera_matrix = load_camera_matrix(path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")

    im_height, im_width, _ = im.shape

    headpose_hr = np.reshape(annotation[img_n - 1, 29:32], (1, -1))
    headpose_ht = np.reshape(annotation[img_n - 1, 32:35], (1, -1))
    h_r, _ = cv2.Rodrigues(headpose_hr)
    fc = np.dot(h_r, face_model)
    fc = fc + np.reshape(headpose_ht, (-1, 1))

    gaze_target = annotation[img_n - 1, 26:29]
    gaze_target = np.reshape(gaze_target, (-1, 1))

    right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1])
    left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3])

    right_eye = get_img_gaze_headpose_per_eye(im, right_eye_center, h_r, gaze_target,
                                              eye_image_width, eye_image_height, camera_matrix)
    left_eye = get_img_gaze_headpose_per_eye(im, left_eye_center, h_r, gaze_target,
                                             eye_image_width, eye_image_height, camera_matrix)

    landmarks = np.reshape(annotation[img_n - 1, 0:24], (1, -1))
    landmarks = norm_landmarks(landmarks, height=im_height, width=im_width)
    coordinates = np.array([annotation[img_n - 1, 25], annotation[img_n - 1, 24]])

    return right_eye, left_eye, landmarks, coordinates
Exemplo n.º 3
0
def load_mpii_face_gaze_image(person_id: int, person_image_path: str, verbose=False):
    person_str = "p" + str(person_id).zfill(2)
    full_file_path = os.path.join(MPII_FACE_GAZE_PATH, person_str, person_image_path)
    if os.path.exists(full_file_path):
        im = load_image_by_cv2(full_file_path)
    else:
        if verbose:
            print(f"Image file {full_file_path} does not exist.")
        im = None
    return im
def _prepare_data_for_calibration(pipeline, dir_name):
    dir_path = os.path.join(DATA_PATH, "application", "calibration", dir_name)
    metadata = _load_metadata(dir_path)
    coords_list = list()
    predictions = list()
    for row in tqdm(metadata):
        file_path = _get_file_path(dir_path, row)
        coords = _get_coords(row)  # x, y
        coords = (coords[1], coords[0])  # y, x

        if not os.path.exists(file_path):
            continue
        im = load_image_by_cv2(file_path)

        if im is None:
            continue

        prediction = pipeline.predict(im)
        predictions.append(prediction.squeeze())
        coords_list.append(coords)
    return np.asarray(coords_list), np.asarray(predictions)
Exemplo n.º 5
0
def process_mpii_face_gaze():
    mean_camera_matrix = False

    right_images = list()
    left_images = list()
    poses = list()
    coords = list()
    people_ids = list()

    eye_image_width = 60
    eye_image_height = 36

    for person_id in range(15):
        person_id_str = str(person_id).zfill(2)
        print(f"Person id: {person_id_str}")

        if mean_camera_matrix:
            camera_matrix = get_avg_camera_matrix()
        else:
            camera_matrix = load_camera_matrix(
                path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")
        screen_size = load_screen_size(
            path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat")
        ann_path = mpii_face_gaze_path_wrapper(
            f"p{person_id_str}/p{person_id_str}.txt")
        annotation = np.loadtxt(ann_path, dtype=str)
        if len(annotation.shape) == 1:
            annotation = np.reshape(annotation, (1, -1))

        face_detector = HogFaceDetector()
        landmarks_detector = KazemiLandmarksDetector()
        model = load_best_modal3_conv_net()
        pipeline = Pipeline(gaze_estimation_model=model,
                            face_detector=face_detector,
                            landmarks_detector=landmarks_detector,
                            eye_image_width=eye_image_width,
                            eye_image_height=eye_image_height,
                            camera_matrix=camera_matrix,
                            screen_size=screen_size)

        file_paths = annotation[:, 0]

        for i, file_path in tqdm(enumerate(file_paths)):
            im = load_image_by_cv2(
                mpii_face_gaze_path_wrapper(f"p{person_id_str}/{file_path}"))
            data = pipeline.process(im)
            if data is None:
                continue
            right_images.append(data["right_image"])
            left_images.append(data["left_image"])
            poses.append(data["pose"])

            # coords
            coords_row = annotation[i, 1:3].astype(float)  # x, y
            coords_row = [coords_row[1], coords_row[0]]  # y, x
            coords_row = [
                coords_row[0] / screen_size[0], coords_row[1] / screen_size[1]
            ]
            coords.append(coords_row)

            people_ids.append(person_id)

        save_data_to_file(right_images, left_images, poses, coords, people_ids)
import numpy as np
from PIL import Image

from data_processing.utils import load_image_by_cv2, mpii_face_gaze_path_wrapper
from settings import FOR_THESIS_DIR
from utils.landmarks import visualize_landmarks_mpii_gaze_format

if __name__ == "__main__":
    """
    Script prints all landmarks from MPII Gaze dataset on photo, to know which landmarks are they.
    """
    person_id_str = "14"
    day = "day04"
    im_file = "0008.jpg"
    offset = 740
    im = load_image_by_cv2(
        mpii_face_gaze_path_wrapper(f"p{person_id_str}/{day}/{im_file}"))

    annotation = np.loadtxt(
        mpii_face_gaze_path_wrapper(f"p{person_id_str}/p{person_id_str}.txt"),
        dtype=str)
    print(f"Real file path: {annotation[offset + 8 - 1, 0]}")
    landmarks = np.reshape(annotation[offset + 8 - 1, 3:15], (1, -1, 2))
    print(landmarks)
    landmarks = landmarks.astype(np.int)

    visualize_landmarks_mpii_gaze_format(landmarks, im, numbers=True)

    im = Image.fromarray(im)
    im.save(os.path.join(FOR_THESIS_DIR, "eye_landmarks.png"))
Exemplo n.º 7
0
def create_own_dataset(dir_name: str):
    # screen resolution
    screen_size = _load_screen_resolution(dir_name)
    # screen_size = (768, 1366)

    # counters
    counter = 0  # count valid records
    broken_path_counter = 0
    no_face_detected_counter = 0

    # data to save
    right_image_list = list()
    left_image_list = list()
    pose_list = list()
    coords_list = list()

    # pipeline
    pipeline = create_pipeline(screen_size=screen_size)
    # pipeline_haarcascade_lbf = create_pipeline(face_detector="haarcascade", landmarks_detector="lbf")
    metadata = _load_metadata(dir_name)

    # time
    time_sum = 0

    for row in tqdm(metadata):
        file_path = _get_file_path(dir_name, row)
        coords = _get_coords(row)  # x, y
        coords = (coords[1], coords[0])  # y, x

        if not os.path.exists(file_path):
            continue
        im = load_image_by_cv2(file_path)

        if im is None:
            broken_path_counter += 1
            continue

        start_time = time()

        data = pipeline.process(im)
        if data is None:
            no_face_detected_counter += 1
            continue

        elapsed = time() - start_time
        time_sum += elapsed

        counter += 1
        right_image_list.append(data["right_image"])
        left_image_list.append(data["left_image"])
        pose_list.append(data["pose"])
        coords_list.append(coords)

    print(f"Valid records: {counter}")
    print(f"Broken paths: {broken_path_counter}")
    print(f"No face detected: {no_face_detected_counter}")
    print(f"Average time: {time_sum / counter}")

    data = {
        "right_image": np.concatenate(right_image_list),
        "left_image": np.concatenate(left_image_list),
        "pose_landmarks": np.concatenate(pose_list),
        "coordinates": np.asarray(coords_list)
    }
    data["coordinates"] = norm_coords(data["coordinates"], screen_size)

    _save_own_dataset(data)
def create_dataset_mpiigaze_processed_both_rgb_full_transformation(
        dataset_name):
    create_dirs(dataset_name)

    face_model = load_face_model()
    eye_image_width = 60
    eye_image_height = 36

    for person_id in range(15):
        person_id_str = str(person_id).zfill(2)
        print(f"--------------\nperson_id_str: {person_id_str}")
        camera_matrix = load_camera_matrix(
            path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")
        screen_size = load_screen_size(
            path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat")
        print(screen_size)
        day_dirs = get_all_days(
            path=mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/"))

        for day in day_dirs:
            left_eyes = list()
            right_eyes = list()
            headposes = list()
            gazes = list()
            coordinates = list()

            print(day)
            ann_path = mpiigaze_path_wrapper(
                f"Data/Original/p{person_id_str}/{day}/annotation.txt")
            annotation = load_annotation(ann_path)
            im_filenames = get_all_jpg_files(
                mpiigaze_path_wrapper(
                    f"Data/Original/p{person_id_str}/{day}/"))
            for i in tqdm(range(len(im_filenames))):
                im_file = im_filenames[i]
                im = load_image_by_cv2(
                    mpiigaze_path_wrapper(
                        f"Data/Original/p{person_id_str}/{day}/{im_file}"))

                headpose_hr = np.reshape(annotation[i, 29:32], (1, -1))
                headpose_ht = np.reshape(annotation[i, 32:35], (1, -1))
                h_r, _ = cv2.Rodrigues(headpose_hr)
                fc = np.dot(h_r, face_model)
                fc = fc + np.reshape(headpose_ht, (-1, 1))

                gaze_target = annotation[i, 26:29]
                gaze_target = np.reshape(gaze_target, (-1, 1))

                right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1])
                left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3])

                right_eye_img, right_eye_headpose, right_eye_gaze = mpii_gaze_normalize_image(
                    im, right_eye_center, h_r, gaze_target,
                    (eye_image_width, eye_image_height), camera_matrix)
                left_eye_img, left_eye_headpose, left_eye_gaze = mpii_gaze_normalize_image(
                    im, left_eye_center, h_r, gaze_target,
                    (eye_image_width, eye_image_height), camera_matrix)

                right_eyes.append(right_eye_img)
                left_eyes.append(left_eye_img)

                headpose_angles = list()
                headpose_angles.extend(
                    count_headpose_angles(right_eye_headpose /
                                          np.linalg.norm(right_eye_headpose)))
                headpose_angles.extend(
                    count_headpose_angles(left_eye_headpose /
                                          np.linalg.norm(left_eye_headpose)))
                headpose_angles.extend(
                    count_headpose_angles(headpose_hr /
                                          np.linalg.norm(headpose_hr)))

                headposes.append(np.array(headpose_angles))
                gazes.append(
                    count_gaze_angles(gaze_target /
                                      np.linalg.norm(gaze_target)))
                coordinates.append([annotation[i, 25], annotation[i, 24]])

            coordinates = norm_coords(coordinates, screen_size)
            # save_coords(person_id_str, day, coordinates)
            save_dataset_mpiigaze_processed_both_rgb(person_id_str,
                                                     day,
                                                     right_eyes,
                                                     left_eyes,
                                                     headposes,
                                                     gazes,
                                                     coordinates,
                                                     dataset_name=dataset_name)
def create_dataset_mpiigaze_processed_both_rgb(dataset_name, headpose_type):
    """
    This function creates dataset with following record structure:
    (right_eye_rgb_img, left_eye_rgb_img, gaze_target_theta, gaze_target_phi, norm_x_coordinate, norm_y_coordinate).
    Records are saved to pickles. One pickle for one person_id_str.
    :return:
    """
    create_dirs(dataset_name)

    face_model = load_face_model()
    eye_image_width = 60
    eye_image_height = 36

    for person_id in range(15):
        person_id_str = str(person_id).zfill(2)
        print(f"--------------\nperson_id_str: {person_id_str}")
        camera_matrix = load_camera_matrix(
            path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")
        screen_size = load_screen_size(
            path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat")
        print(screen_size)
        day_dirs = get_all_days(
            path=mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/"))

        for day in day_dirs:
            left_eyes = list()
            right_eyes = list()
            headposes = list()
            gazes = list()
            coordinates = list()

            print(day)
            ann_path = mpiigaze_path_wrapper(
                f"Data/Original/p{person_id_str}/{day}/annotation.txt")
            annotation = load_annotation(ann_path)
            im_filenames = get_all_jpg_files(
                mpiigaze_path_wrapper(
                    f"Data/Original/p{person_id_str}/{day}/"))
            for i in tqdm(range(len(im_filenames))):
                im_file = im_filenames[i]
                im = load_image_by_cv2(
                    mpiigaze_path_wrapper(
                        f"Data/Original/p{person_id_str}/{day}/{im_file}"))

                headpose_hr = np.reshape(annotation[i, 29:32], (1, -1))
                headpose_ht = np.reshape(annotation[i, 32:35], (1, -1))
                h_r, _ = cv2.Rodrigues(headpose_hr)
                fc = np.dot(h_r, face_model)
                fc = fc + np.reshape(headpose_ht, (-1, 1))

                gaze_target = annotation[i, 26:29]
                gaze_target = np.reshape(gaze_target, (-1, 1))

                right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1])
                left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3])

                right_eye_img = cut_eye(im, right_eye_center, h_r,
                                        (eye_image_width, eye_image_height),
                                        camera_matrix)
                left_eye_img = cut_eye(im, left_eye_center, h_r,
                                       (eye_image_width, eye_image_height),
                                       camera_matrix)

                right_eyes.append(right_eye_img)
                left_eyes.append(left_eye_img)
                if headpose_type == "2_3_dim_vectors":
                    headposes.append(
                        np.concatenate((headpose_hr, headpose_ht),
                                       axis=1).squeeze())
                elif headpose_type == "2_angles":
                    headposes.append(
                        count_headpose_angles(headpose_hr /
                                              np.linalg.norm(headpose_hr)))
                gazes.append(
                    count_gaze_angles(gaze_target /
                                      np.linalg.norm(gaze_target)))
                coordinates.append([annotation[i, 25], annotation[i, 24]])

            coordinates = norm_coords(coordinates, screen_size)
            # save_coords(person_id_str, day, coordinates)
            save_dataset_mpiigaze_processed_both_rgb(person_id_str,
                                                     day,
                                                     right_eyes,
                                                     left_eyes,
                                                     headposes,
                                                     gazes,
                                                     coordinates,
                                                     dataset_name=dataset_name)