示例#1
0
def main():
    args = build_argparser().parse_args()

    preview_flags = args.preview_flags

    logger = logging.getLogger()
    input_path = args.input

    if input_path.lower() == 'cam':
        input_feed = InputFeeder('cam')
    else:
        if not os.path.isfile(input_path):
            logger.error('Unable to find specified video file')
            exit(1)
        file_extension = input_path.split(".")[-1]
        if (file_extension in ['jpg', 'jpeg', 'bmp']):
            input_feed = InputFeeder('image', input_path)
        elif (file_extension in ['avi', 'mp4']):
            input_feed = InputFeeder('video', input_path)
        else:
            logger.error(
                "Unsupported file Extension. Allowed ['jpg', 'jpeg', 'bmp', 'avi', 'mp4']"
            )
            exit(1)

    if sys.platform == "linux" or sys.platform == "linux2":
        #CODEC = 0x00000021
        CODEC = cv2.VideoWriter_fourcc(*"mp4v")
    elif sys.platform == "darwin":
        CODEC = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
    else:
        print("Unsupported OS.")
        exit(1)

    file_flag = False
    if args.output_file.lower() == 'y':
        file_flag = True
        out = cv2.VideoWriter('output.mp4', CODEC, 30,
                              (FRAME_WIDTH, FRAME_HEIGHT))

    modelPathDict = {
        'face_detect': args.face_detection_model,
        'face_landmark_regress': args.facial_landmark_model,
        'head_pose': args.head_pose_model,
        'gaze_estimate': args.gaze_estimation_model
    }

    for pathname in modelPathDict:
        if not os.path.isfile(modelPathDict[pathname]):
            logger.error('Unable to find specified ' + pathname + ' xml file')
            exit(1)

    #initializing models
    fdm = FaceDetectionModel(modelPathDict['face_detect'], args.device,
                             args.cpu_extension)
    fldm = FacialLandmarksDetectionModel(
        modelPathDict['face_landmark_regress'], args.device,
        args.cpu_extension)
    hpem = HeadPoseEstimationModel(modelPathDict['head_pose'], args.device,
                                   args.cpu_extension)
    gem = GazeEstimationModel(modelPathDict['gaze_estimate'], args.device,
                              args.cpu_extension)

    #initializing mouse controller
    mouse_controller = MouseController('medium', 'fast')

    input_feed.load_data()

    #checking models
    fdm.check_model()
    fldm.check_model()
    hpem.check_model()
    gem.check_model()

    #loading models / creating executable network
    fdm.load_model()
    fldm.load_model()
    hpem.load_model()
    gem.load_model()

    frame_count = 0
    for ret, frame in input_feed.next_batch():
        if not ret:
            break

        frame_count += 1

        key = cv2.waitKey(60)
        """
        Sequence of model execution:-
        1. Predict from each model.
        2. Preprocess of outputs from each model.
        3. Send the processed output to the next model.

        Model Sequence:- 
                                -   Head Pose Estimation Model      -
        Face Detection Model <(First Head Pose and Then Facial Landmark)>Gaze Estimation Model 
                                -   Facial Landmark Detection Model -  
        """

        cropped_face, face_coords = fdm.preprocess_output(
            frame.copy(), fdm.predict(frame.copy()), args.prob_threshold)

        if type(cropped_face) == int:
            logger.error('Unable to detect the face.')
            if key == 27:
                break
            continue

        hp_out = hpem.preprocess_output(hpem.predict(cropped_face.copy()))

        left_eye, right_eye, eye_coords = fldm.preprocess_output(
            cropped_face.copy(), fldm.predict(cropped_face.copy()))

        new_mouse_coord, gaze_vector = gem.preprocess_output(
            gem.predict(left_eye, right_eye, hp_out), hp_out)

        if (not len(preview_flags) == 0) or file_flag:
            preview_frame = frame.copy()

            if 'fd' in preview_flags:
                preview_frame = cv2.rectangle(preview_frame,
                                              (face_coords[0], face_coords[1]),
                                              (face_coords[2], face_coords[3]),
                                              (0, 0, 255), 3)
                cropped_face = preview_frame[face_coords[1]:face_coords[3],
                                             face_coords[0]:face_coords[2]]

            if 'fld' in preview_flags:
                cropped_face = cv2.rectangle(
                    cropped_face,
                    (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                    (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                    (0, 255, 0), 3)
                cropped_face = cv2.rectangle(
                    cropped_face,
                    (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                    (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                    (0, 255, 0), 3)

                preview_frame[face_coords[1]:face_coords[3],
                              face_coords[0]:face_coords[2]] = cropped_face

            if 'hp' in preview_flags:
                cv2.putText(
                    preview_frame,
                    'Pose Angles: yaw: {:.2f} | pitch: {:.2f} | roll: {:.2f}'.
                    format(hp_out[0], hp_out[1], hp_out[2]), (20, 40),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

            if 'ge' in preview_flags:

                x, y = int(gaze_vector[0] * GAZE_ARROW_LENGTH), -int(
                    gaze_vector[1] * GAZE_ARROW_LENGTH)

                le_mid_x = int((eye_coords[0][0] + eye_coords[0][2]) / 2)
                le_mid_y = int((eye_coords[0][1] + eye_coords[0][3]) / 2)
                re_mid_x = int((eye_coords[1][0] + eye_coords[1][2]) / 2)
                re_mid_y = int((eye_coords[1][1] + eye_coords[1][3]) / 2)

                cv2.arrowedLine(cropped_face, (le_mid_x, le_mid_y),
                                ((le_mid_x + x), (le_mid_y + y)), (255, 0, 0),
                                GAZE_ARROW_WIDTH)
                cv2.arrowedLine(cropped_face, (re_mid_x, re_mid_y),
                                ((re_mid_x + x), (re_mid_y + y)), (255, 0, 0),
                                GAZE_ARROW_WIDTH)

                preview_frame[face_coords[1]:face_coords[3],
                              face_coords[0]:face_coords[2]] = cropped_face

            if (not len(preview_flags) == 0) and frame_count % 2 == 0:
                if args.zoomed:
                    cv2.imshow(
                        'Cropped Face',
                        cv2.resize(cropped_face, (FRAME_WIDTH, FRAME_HEIGHT)))
                else:
                    cv2.imshow(
                        'Preview',
                        cv2.resize(preview_frame, (FRAME_WIDTH, FRAME_HEIGHT)))

            if file_flag:
                out.write(
                    cv2.resize(preview_frame, (FRAME_WIDTH, FRAME_HEIGHT)))

        #move the mouse pointer
        try:
            mouse_controller.move(new_mouse_coord[0], new_mouse_coord[1])
        except pyautogui.FailSafeException:
            pass

        if frame_count % 2 == 0 and len(preview_flags) == 0:
            cv2.imshow('Video', cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT)))

        if key == 27:
            break

    logger.error('VideoStream ended.')
    if args.output_file.lower() == 'y':
        out.release()
    input_feed.close()
    cv2.destroyAllWindows()
示例#2
0
def main():
    args = build_argparser().parse_args()
    logger = logging.getLogger()

    if args.input_type == 'video' or args.input_type == 'image':
        extension = str(args.input).split('.')[1]
        feeder = InputFeeder(args.input_type, args.input)
    elif args.input_type == 'cam':
        feeder = InputFeeder(args.input_type)

    mc = MouseController("medium", "fast")
    feeder.load_data()

    face_model = FaceDetectionModel(args.facedetectionmodel, args.device,
                                    args.cpu_extension)
    face_model.check_model()

    landmark_model = Landmark_Model(args.facelandmarkmodel, args.device,
                                    args.cpu_extension)
    landmark_model.check_model()

    gaze_model = Gaze_Estimation_Model(args.gazeestimationmodel, args.device,
                                       args.cpu_extension)
    gaze_model.check_model()

    head_model = Head_Pose_Model(args.headposemodel, args.device,
                                 args.cpu_extension)
    head_model.check_model()

    face_model.load_model()
    logger.info("Face Detection Model Loaded...")

    landmark_model.load_model()
    logger.info("Landmark Detection Model Loaded...")

    head_model.load_model()
    logger.info("Head Pose Detection Model Loaded...")

    gaze_model.load_model()
    logger.info("Gaze Estimation Model Loaded...")

    logger.info('All Models are loaded\n\n')
    out = cv2.VideoWriter('output_video.mp4', 0x00000021, 30, (500, 500))

    frame_count = 0
    for ret, frame in feeder.next_batch():
        if not ret:
            break
            frame_count += 1

        if frame_count % 5 == 0:
            cv2.imshow('video', cv2.resize(frame, (500, 500)))
        key = cv2.waitKey(60)
        faceROI = None

        if True:
            faceROI, box = FaceDetectionModel.predict(frame.copy(),
                                                      args.prob_threshold)
            if faceROI is None:
                logger.error("Unable to detect the face.")
                if key == 27:
                    break
                continue

            (lefteye_x, lefteye_y), (
                righteye_x, righteye_y
            ), eye_coords, left_eye, right_eye = FaceLandmarkModel.predict(
                faceROI.copy(), EYE_ROI=10)
            head_position = HeadPoseModel.predict(faceROI.copy())
            new_mouse_coord, gaze_vector = EyeGazeModel.predict(
                left_eye.copy(), right_eye.copy(), head_position)

            if (not len(previewFlags) == 0):
                preview_frame = frame.copy()
                if 'fd' in previewFlags:
                    #cv2.rectangle(preview_frame, (face_coords[0], face_coords[1]), (face_coords[2], face_coords[3]), (255,0,0), 3)
                    preview_frame = croppedFace
                if 'fld' in previewFlags:
                    cv2.rectangle(
                        croppedFace,
                        (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                        (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                        (0, 255, 0), 3)
                    cv2.rectangle(
                        croppedFace,
                        (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                        (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                        (0, 255, 0), 3)
                    #preview_frame[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]] = croppedFace
                if 'hp' in previewFlags:
                    cv2.putText(
                        preview_frame,
                        "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                        format(hp_out[0], hp_out[1], hp_out[2]), (10, 20),
                        cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1)
                if 'ge' in previewFlags:
                    x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] *
                                                            12), 160
                    le = cv2.line(left_eye.copy(), (x - w, y - w),
                                  (x + w, y + w), (255, 0, 255), 2)
                    cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255),
                             2)
                    re = cv2.line(right_eye.copy(), (x - w, y - w),
                                  (x + w, y + w), (255, 0, 255), 2)
                    cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255),
                             2)
                    croppedFace[eye_coords[0][1]:eye_coords[0][3],
                                eye_coords[0][0]:eye_coords[0][2]] = le
                    croppedFace[eye_coords[1][1]:eye_coords[1][3],
                                eye_coords[1][0]:eye_coords[1][2]] = re
                    #preview_frame[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]] = croppedFace
                #cv2.imshow("visualization",cv2.resize(preview_frame,(500,500)))
                out.write(frame)

            if frame_count % 5 == 0:
                mc.move(new_mouse_coord[0], new_mouse_coord[1])
            if key == 27:
                break

    logger.error("VideoStream ended...")
    out.release()
    cv2.destroyAllWindows()
    inputFeeder.close()