Exemplo n.º 1
0
def live_demo(data):
    color_space = data['color_space']
    model_type = data['model_type']
    device_name = data['device_name']
    model_format = data['model_format']
    model_path = data['model_path']

    mode = 'rc'
    print('DEVICE NAME : ', device_name)

    # initialize inference engine - torch or onnx
    inferenceEngine = InferenceEngine(model_format, color_space, model_type,
                                      model_path)

    # get screen monitor and video capture stream
    monitor = get_monitors()[0]
    cap = cv2.VideoCapture(0)

    # transform to normalize images
    normalize_image = normalize_image_transform(image_size=IMAGE_SIZE,
                                                split='test',
                                                jitter=False,
                                                color_space=color_space)

    # ititial target - it will keep changing
    target = 0

    screenOffsetX = 0
    screenOffsetY = 0

    while True:
        # read a new frame
        _, webcam_image = cap.read()
        # create a display object
        display = np.zeros(
            (monitor.height - screenOffsetY, monitor.width - screenOffsetX, 3),
            dtype=np.uint8)

        # find face landmarks/keypoints
        shape_np, isValid = find_face_dlib(webcam_image)
        if mode == "pc":
            webcam_image, anchor_indices = perspectiveCorrection(
                webcam_image, shape_np)
            shape_np, isValid = find_face_dlib(webcam_image)
        else:
            anchor_indices = range(68)

        # basic display
        live_image = webcam_image.copy()
        if isValid:
            draw_landmarks(live_image, shape_np, anchor_indices)
            # live_image = draw_landmarks2(live_image, shape_np, anchor_indices)
            # delaunay_correction(live_image, shape_np, delaunay_color=(255, 255, 255))
            # draw_delaunay(live_image, shape_np, delaunay_color=(255, 255, 255))
            # draw_outline(live_image, shape_np, color=(255, 255, 255))
        live_image = Image.fromarray(live_image)
        live_image = transforms.functional.hflip(live_image)
        live_image = transforms.functional.resize(
            live_image, (monitor.height, monitor.width), interpolation=2)
        live_image = transforms.functional.adjust_brightness(live_image, 0.4)
        live_image = np.asarray(live_image)
        generate_baseline_display_data(display, screenOffsetX, screenOffsetY,
                                       monitor, live_image)

        # do only for valid face objects
        if isValid:
            if True:
                face_rect, left_eye_rect, right_eye_rect, isValid = rc_landmarksToRects(
                    shape_np, isValid)
                face_image, left_eye_image, right_eye_image, face_grid_image = grid_generate_face_eye_images(
                    face_rect, left_eye_rect, right_eye_rect, webcam_image)
                # print(face_image.shape, face_grid_image.shape)
                # OpenCV BGR -> PIL RGB conversion
                image_eye_left, image_eye_right, image_face, image_face_grid = grid_prepare_image_inputs(
                    face_image, left_eye_image, right_eye_image,
                    face_grid_image)
                # print(face_grid_image.size, face_grid_image.size)
                # PIL RGB -> PIL YCBCr. Then Convert images into tensors
                imEyeL, imEyeR, imFace, imFaceGrid = grid_prepare_image_tensors(
                    color_space, image_face, image_eye_left, image_eye_right,
                    image_face_grid, normalize_image)
                start_time = datetime.now()
                gaze_prediction_np = inferenceEngine.run_inference(
                    normalize_image, imFace, imEyeL, imEyeR, imFaceGrid)
            else:
                face_rect, left_eye_rect, right_eye_rect, isValid = rc_landmarksToRects(
                    shape_np, isValid)
                face_image, left_eye_image, right_eye_image, face_grid, face_grid_image = rc_generate_face_eye_images(
                    face_rect, left_eye_rect, right_eye_rect, webcam_image)

                # OpenCV BGR -> PIL RGB conversion
                image_eye_left, image_eye_right, image_face = prepare_image_inputs(
                    face_image, left_eye_image, right_eye_image)

                # PIL RGB -> PIL YCBCr. Then Convert images into tensors
                imEyeL, imEyeR, imFace, imFaceGrid = prepare_image_tensors(
                    color_space, image_face, image_eye_left, image_eye_right,
                    face_grid, normalize_image)
                start_time = datetime.now()
                gaze_prediction_np = inferenceEngine.run_inference(
                    normalize_image, imFace, imEyeL, imEyeR, imFaceGrid)
            time_elapsed = datetime.now() - start_time

            display = generate_display_data(display, face_grid_image,
                                            face_image, gaze_prediction_np,
                                            left_eye_image, monitor,
                                            right_eye_image, time_elapsed,
                                            target, device_name)

        # show default or updated display object on the screen
        cv2.imshow("display", display)

        # keystroke detection
        k = cv2.waitKey(5) & 0xFF
        # d=100, g=103, m=109
        if k == 27:  # ESC
            break
        if k == 32:  # Space
            target = (target + 1) % len(TARGETS)
        # if k == 100: # d
        #     delauny = ~delauny
        # if k == 103: # g
        #     grid = ~grid
        # if k == 109: # m
        #     mask = ~mask
        # if k == 108: # l
        #     landmarks = ~landmarks

    cv2.destroyAllWindows()
    cap.release()
Exemplo n.º 2
0
def main():
    args = parse_arguments()

    data_directory = args.data_path

    if data_directory is None:
        os.error(
            "Error: must specify --data_path, like /data/EyeCapture/200407")
        return

    directories = sorted(findCaptureSessionDirs(data_directory))
    total_directories = len(directories)

    # print(f"Found {total_directories} directories")

    multi_progress_bar = MultiProgressBar(max_value=total_directories,
                                          boundary=True)

    for directory_idx, directory in enumerate(directories):
        captures = sorted(findCapturesInSession(
            os.path.join(data_directory, directory)),
                          key=str)
        total_captures = len(captures)

        info_data = loadJsonData(
            os.path.join(data_directory, directory, "info.json"))
        if not isSupportedDevice(info_data["DeviceName"]):
            # If the device is not supported in device_metrics_sku.json skip it
            # print('%s, %s, %s'%(directory_idx, directory, 'Unsupported SKU'))
            multi_progress_bar.addSubProcess(index=directory_idx, max_value=0)
            continue

        screen_data = loadJsonData(
            os.path.join(data_directory, directory, "screen.json"))

        # dotinfo.json - { "DotNum": [ 0, 0, ... ],
        #                  "XPts": [ 160, 160, ... ],
        #                  "YPts": [ 284, 284, ... ],
        #                  "XCam": [ 1.064, 1.064, ... ],
        #                  "YCam": [ -6.0055, -6.0055, ... ],
        #                  "Time": [ 0.205642, 0.288975, ... ] }
        #
        # PositionIndex == DotNum
        # Timestamp == Time, but no guarantee on order. Unclear if that is an issue or not
        dotinfo = {
            "DotNum": [],
            "XPts": [],
            "YPts": [],
            "XCam": [],
            "YCam": [],
            "Confidence": [],
            "Time": []
        }

        output_path = os.path.join(data_directory, directory)

        faceInfoDict = newFaceInfoDict()

        # frames.json - ["00000.jpg","00001.jpg"]
        frames = []

        facegrid = {"X": [], "Y": [], "W": [], "H": [], "IsValid": []}

        if directory_idx % 10 < 8:
            dataset_split = "train"
        elif directory_idx % 10 < 9:
            dataset_split = "val"
        else:
            dataset_split = "test"

        # info.json - {"TotalFrames":99,"NumFaceDetections":97,"NumEyeDetections":56,"Dataset":"train","DeviceName":"iPhone 6"}
        info = {
            "TotalFrames": total_captures,
            "NumFaceDetections": 0,
            "NumEyeDetections": 0,
            "Dataset": dataset_split,
            "DeviceName": info_data["DeviceName"]
        }

        # screen.json - { "H": [ 568, 568, ... ], "W": [ 320, 320, ... ], "Orientation": [ 1, 1, ... ] }
        screen = {"H": [], "W": [], "Orientation": []}

        if not os.path.exists(output_path):
            os.mkdir(output_path)

        multi_progress_bar.addSubProcess(index=directory_idx,
                                         max_value=total_captures)

        for capture_idx, capture in enumerate(captures):
            capture_json_path = os.path.join(data_directory, directory,
                                             "frames", capture + ".json")
            capture_jpg_path = os.path.join(data_directory, directory,
                                            "frames", capture + ".jpg")

            try:
                if os.path.isfile(capture_json_path) and os.path.isfile(
                        capture_jpg_path):
                    capture_data = loadJsonData(capture_json_path)

                    capture_image = PILImage.open(capture_jpg_path)
                    capture_image_np = np.array(
                        capture_image
                    )  # dlib wants images in numpy array format

                    shape_np, isValid = find_face_dlib(capture_image_np)

                    info["NumFaceDetections"] = info["NumFaceDetections"] + 1

                    face_rect, left_eye_rect, right_eye_rect, isValid = landmarksToRects(
                        shape_np, isValid)

                    # facegrid.json - { "X": [ 6, 6, ... ], "Y": [ 10, 10, ... ], "W": [ 13, 13, ... ], "H": [ 13, 13, ... ], "IsValid": [ 1, 1, ... ] }
                    if isValid:
                        faceGridX, faceGridY, faceGridW, faceGridH = generate_face_grid_rect(
                            face_rect, capture_image.width,
                            capture_image.height)
                    else:
                        faceGridX = 0
                        faceGridY = 0
                        faceGridW = 0
                        faceGridH = 0

                    facegrid["X"].append(faceGridX)
                    facegrid["Y"].append(faceGridY)
                    facegrid["W"].append(faceGridW)
                    facegrid["H"].append(faceGridH)
                    facegrid["IsValid"].append(isValid)

                    faceInfoDict, faceInfoIdx = faceEyeRectsToFaceInfoDict(
                        faceInfoDict, face_rect, left_eye_rect, right_eye_rect,
                        isValid)
                    info["NumEyeDetections"] = info["NumEyeDetections"] + 1

                    # screen.json - { "H": [ 568, 568, ... ], "W": [ 320, 320, ... ], "Orientation": [ 1, 1, ... ] }
                    screen["H"].append(screen_data['H'][capture_idx])
                    screen["W"].append(screen_data['W'][capture_idx])
                    screen["Orientation"].append(
                        screen_data['Orientation'][capture_idx])

                    # dotinfo.json - { "DotNum": [ 0, 0, ... ],
                    #                  "XPts": [ 160, 160, ... ],
                    #                  "YPts": [ 284, 284, ... ],
                    #                  "XCam": [ 1.064, 1.064, ... ],
                    #                  "YCam": [ -6.0055, -6.0055, ... ],
                    #                  "Confidence": [ 59.3, 94.2, ... ],
                    #                  "Time": [ 0.205642, 0.288975, ... ] }
                    #
                    # PositionIndex == DotNum
                    # Timestamp == Time, but no guarantee on order. Unclear if that is an issue or not
                    x_raw = capture_data["XRaw"]
                    y_raw = capture_data["YRaw"]
                    x_cam, y_cam = screen2cam(
                        x_raw, y_raw, screen_data['Orientation'][capture_idx],
                        screen_data["W"][capture_idx],
                        screen_data["H"][capture_idx], info_data["DeviceName"])
                    confidence = capture_data["Confidence"]

                    dotinfo["DotNum"].append(capture_idx)
                    dotinfo["XPts"].append(x_raw)
                    dotinfo["YPts"].append(y_raw)
                    dotinfo["XCam"].append(x_cam)
                    dotinfo["YCam"].append(y_cam)
                    dotinfo["Confidence"].append(confidence)
                    dotinfo["Time"].append(
                        0)  # TODO replace with timestamp as needed

                    frame_name = str(f"{capture}.jpg")
                    frames.append(frame_name)
                else:
                    print(f"Error file doesn't exists: {directory}/{capture}")
            except json.decoder.JSONDecodeError:
                print(f"Error processing file: {directory}/{capture}")

            multi_progress_bar.update(index=directory_idx,
                                      value=capture_idx + 1)

        with open(os.path.join(output_path, 'frames.json'), "w") as write_file:
            json.dump(frames, write_file)
        with open(os.path.join(output_path, 'screen.json'), "w") as write_file:
            json.dump(screen, write_file)
        with open(os.path.join(output_path, 'info.json'), "w") as write_file:
            json.dump(info, write_file)
        with open(os.path.join(output_path, 'dotInfo.json'),
                  "w") as write_file:
            json.dump(dotinfo, write_file)
        with open(os.path.join(output_path, 'faceGrid.json'),
                  "w") as write_file:
            json.dump(facegrid, write_file)
        with open(os.path.join(output_path, 'dlibFace.json'),
                  "w") as write_file:
            json.dump(faceInfoDict["Face"], write_file)
        with open(os.path.join(output_path, 'dlibLeftEye.json'),
                  "w") as write_file:
            json.dump(faceInfoDict["LeftEye"], write_file)
        with open(os.path.join(output_path, 'dlibRightEye.json'),
                  "w") as write_file:
            json.dump(faceInfoDict["RightEye"], write_file)
Exemplo n.º 3
0
def main():
    args = parse_arguments()

    device_name = args.device_name

    if device_name is None:
        print(
            f"Invalid argument - must specify device_name: {args.device_name}")
        return

    color_space = args.color_space

    use_torch = False
    use_onnx = False

    if args.mode == "torch":
        use_torch = True
    elif args.mode == "onnx":
        use_onnx = True
    else:
        print(f"Invalid argument - must specify valid mode: {args.mode}")
        return

    if use_torch:
        model = initialize_torch(args.torch_model_path, args.model_type,
                                 args.device, color_space)
    elif use_onnx:
        session = initialize_onnx(args.onnx_model_path, args.device)

    monitor = get_monitors()[0]  # Assume only one monitor

    cap = cv2.VideoCapture(0)

    normalize_image = normalize_image_transform(image_size=IMAGE_SIZE,
                                                jitter=False,
                                                split='test',
                                                color_space=color_space)

    target = 0

    stimulusX, stimulusY = change_target(target, monitor, device_name)

    screenOffsetX = 0
    screenOffsetY = 100

    if args.gazehid:
        eyeGazeIoctlDll.InitializeEyeGaze()

    while True:
        _, webcam_image = cap.read()

        display = np.zeros(
            (monitor.height - screenOffsetY, monitor.width - screenOffsetX, 3),
            dtype=np.uint8)

        shape_np, isValid = find_face_dlib(webcam_image)

        display = generate_baseline_display_data(display, screenOffsetX,
                                                 screenOffsetY, webcam_image)

        if isValid:
            face_rect, left_eye_rect, right_eye_rect, isValid = rc_landmarksToRects(
                shape_np, isValid)

        if isValid:
            face_image, left_eye_image, right_eye_image, face_grid_image = grid_generate_face_eye_images(
                face_rect, left_eye_rect, right_eye_rect, webcam_image)
            # print(face_image.shape, face_grid_image.shape)
            # OpenCV BGR -> PIL RGB conversion
            image_eye_left, image_eye_right, image_face, image_face_grid = grid_prepare_image_inputs(
                face_image, left_eye_image, right_eye_image, face_grid_image)
            # print(face_grid_image.size, face_grid_image.size)
            # PIL RGB -> PIL YCBCr. Then Convert images into tensors
            tensor_eye_left, tensor_eye_right, tensor_face, tensor_face_grid = grid_prepare_image_tensors(
                color_space, image_face, image_eye_left, image_eye_right,
                image_face_grid, normalize_image, args.device)

            start_time = datetime.now()
            if use_torch:
                gaze_prediction_np = run_torch_inference(
                    model, tensor_face, tensor_eye_left, tensor_eye_right,
                    tensor_face_grid)
            elif use_onnx:
                gaze_prediction_np = run_onnx_inference(
                    session, tensor_face, tensor_eye_left, tensor_eye_right,
                    tensor_face_grid)

            time_elapsed = datetime.now() - start_time

            if args.gazehid:
                (gazePredictionScreenPixelXFromCamera,
                 gazePredictionScreenPixelYFromCamera) = cam2screen(
                     gaze_prediction_np[0],
                     gaze_prediction_np[1],
                     1,
                     monitor.width,
                     monitor.height,
                     deviceName=device_name)
                timestamp = c_int64(pandas.Timestamp.utcnow().to_datetime64())

                print("SendGazeReport[", gazePredictionScreenPixelXFromCamera,
                      ", ", gazePredictionScreenPixelYFromCamera, ", ",
                      timestamp, "]")
                eyeGazeIoctlDll.SendGazeReportPixel(
                    int(gazePredictionScreenPixelXFromCamera),
                    int(gazePredictionScreenPixelYFromCamera), timestamp)

            display = generate_display_data(display, face_image,
                                            left_eye_image, right_eye_image,
                                            face_grid_image,
                                            gaze_prediction_np, monitor,
                                            stimulusX, stimulusY, time_elapsed,
                                            device_name)

        cv2.imshow("display", display)

        k = cv2.waitKey(5) & 0xFF
        if k == 27:  # ESC
            break
        if k == 32:
            target = target + 1
            if target >= len(TARGETS):
                target = 0
            stimulusX, stimulusY = change_target(target, monitor, device_name)

    cv2.destroyAllWindows()
    cap.release()
Exemplo n.º 4
0
print(f"Found {total_directories} directories")

for directory_idx, directory in enumerate(directories):
    print(f"Processing {directory_idx + 1}/{total_directories}")

    recording_path = os.path.join(data_directory, directory)
    output_path = os.path.join(output_directory, directory)
    filenames = loadJsonData(os.path.join(recording_path, "frames.json"))

    faceInfoDict = newFaceInfoDict()
    for idx, filename in enumerate(filenames):
        image_path = os.path.join(recording_path, "frames", filename)
        image = PILImage.open(image_path)
        image = np.array(image.convert('RGB'))
        shape_np, isValid = find_face_dlib(image)
        face_rect, left_eye_rect, right_eye_rect, isValid = landmarksToRects(
            shape_np, isValid)

        faceInfoDict, faceInfoIdx = faceEyeRectsToFaceInfoDict(
            faceInfoDict, face_rect, left_eye_rect, right_eye_rect, isValid)

    if not os.path.exists(output_directory):
        os.mkdir(output_directory)
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    with open(os.path.join(output_path, 'dlibFace.json'), "w") as write_file:
        json.dump(faceInfoDict["Face"], write_file)
    with open(os.path.join(output_path, 'dlibLeftEye.json'),
              "w") as write_file: