Exemplo n.º 1
0
def main():
    """
    Load inference networks, stream video to network,
    and output stats and video.
    :return: None
    """

    # Logger init
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")

    # Get command line args
    args = get_arg()

    #Load Preferencies
    with open(args.config_file, "r") as yamlfile:
        cfg = yaml.load(yamlfile, Loader=yaml.FullLoader)
    models = cfg['models']
    input_source = args.input
    video_path = cfg['video_path']
    face_model = FaceDetection(models['face_detection'])
    head_pose_model = HeadPoseEstimation(models['head_pose_estimation'])
    facial_landmarks_model = FacialLandmarksDetection(models['facial_landmarks_detection'])
    gaze_estimation_model = GazeEstimation(models['gaze_estimation'])

    # Initialise the MouseController
    mouse_contr = MouseController("low","fast")

    # Load the models and log timing
    start_time = time.time()
    face_model.load_model(args.device)
    logging.info("Load Face Detection model: {:.1f}ms".format(1000 * (time.time() - start_time)) )

    start_time = time.time()
    facial_landmarks_model.load_model(args.device)
    logging.info("Load Facial Landmarks Detection model: {:.1f}ms".format(1000 * (time.time() - start_time)) )

    start_time = time.time()
    head_pose_model.load_model(args.device)
    logging.info("Load Head Pose Estimation model: {:.1f}ms".format(1000 * (time.time() - start_time)) )

    start_time = time.time()
    gaze_estimation_model.load_model(args.device) 
    logging.info("Load Gaze Estimation model: {:.1f}ms".format(1000 * (time.time() - start_time)) )

    # Get and open video or camera capture
    #input_feed = InputFeeder('video', args.input)
    #input_feed.load_data()

    input_feed = InputFeeder(input_type=input_source, input_file=video_path)
    input_feed.load_data()

    if not input_feed.cap.isOpened():
        log.critical('Error opening input, check --video_path parameter')
        sys.exit(1)
    # FPS = input_feed.get_fps()

    # Grab the shape of the input 
    # width = input_feed.get_width()
    # height = input_feed.get_height()

    # init scene variables
    frame_count = 0

    ### Loop until stream is over ###
    facedetect_infer_time = 0
    landmark_infer_time = 0
    headpose_infer_time = 0
    gaze_infer_time = 0
    while True:
        # Read the next frame
        try:
            frame = next(input_feed.next_batch())
        except StopIteration:
            break

        if frame is None:
            break


        key_pressed = cv2.waitKey(60)
        frame_count += 1
        input_height, input_width, _ = frame.shape
        logging.info("frame {count} size {w}, {h}".format(count= frame_count, w = input_width, h =input_height)) 
        
        # face detection
        p_frame = face_model.preprocess_input(frame)
        start_time = time.time()
        fnoutput = face_model.predict(p_frame)
        facedetect_infer_time += time.time() - start_time
        out_frame,fboxes = face_model.preprocess_output(fnoutput,frame,args.overlay, args.prob_threshold)
        
        #for each face
        for fbox in fboxes:

            face = frame[fbox[1]:fbox[3],fbox[0]:fbox[2]]
            p_frame = facial_landmarks_model.preprocess_input(face)
            
            start_time = time.time()
            lmoutput = facial_landmarks_model.predict(p_frame)
            landmark_infer_time += time.time() - start_time
            out_frame,left_eye_point,right_eye_point = facial_landmarks_model.preprocess_output(lmoutput, fbox, out_frame,args.overlay, args.prob_threshold)

            # get head pose estimation
            p_frame  = head_pose_model.preprocess_input(face)
            start_time = time.time()
            hpoutput = head_pose_model.predict(p_frame)
            headpose_infer_time += time.time() - start_time
            out_frame, headpose_angels = head_pose_model.preprocess_output(hpoutput,out_frame, face,fbox,args.overlay, args.prob_threshold)

            # get gaze  estimation
            out_frame, left_eye, right_eye  = gaze_estimation_model.preprocess_input(out_frame,face,left_eye_point,right_eye_point,args.overlay)
            start_time = time.time()
            geoutput = gaze_estimation_model.predict(left_eye, right_eye, headpose_angels)
            gaze_infer_time += time.time() - start_time
            out_frame, gazevector = gaze_estimation_model.preprocess_output(geoutput,out_frame,fbox, left_eye_point,right_eye_point,args.overlay, args.prob_threshold)

            cv2.imshow('im', out_frame)
            
            if(args.mouse_move):
                logging.info("mouse move vector : x ={}, y={}".format(gazevector[0], gazevector[1])) 
                mouse_contr.move(gazevector[0], gazevector[1])
            
            #use only first detected face in the frame
            break
        
        # Break if escape key pressed
        if key_pressed == 27:
            break

    #logging inference times
    if(frame_count>0):
        logging.info("***** Models Inference time *****") 
        logging.info("Face Detection:{:.1f}ms".format(1000* facedetect_infer_time/frame_count))
        logging.info("Facial Landmarks Detection:{:.1f}ms".format(1000* landmark_infer_time/frame_count))
        logging.info("Headpose Estimation:{:.1f}ms".format(1000* headpose_infer_time/frame_count))
        logging.info("Gaze Estimation:{:.1f}ms".format(1000* gaze_infer_time/frame_count))


    # Release the capture and destroy any OpenCV windows
    input_feed.close()
    cv2.destroyAllWindows()
Exemplo n.º 2
0
def main():

    inputPath = args.input
    inputFeeder = None

    # Verify and Load Models
    face_model_link = args.facedetection
    facial_landmark_link = args.faciallandmark
    gaze_estimation_link = args.gazeestimation
    head_pose_link = args.headpose
    if not check_model_exists(face_model_link) or not check_model_exists(
            facial_landmark_link) or not check_model_exists(
                gaze_estimation_link) or not check_model_exists(
                    head_pose_link):
        exit(1)

    device_name = args.device
    threshold = args.prob_threshold
    cpu_extension = args.cpu_extension
    previewHeadPose = args.previewHeadPose
    previewFace = args.previewFaceDetection
    previewFaceLandmark = args.previewFaceLandmark
    previewGazeEstimation = args.previewGazeEstimation

    fliph = True if str(args.flip_horizontal).lower() == "true" else False

    # Initialize Models
    face_model = FaceDetectionModel(face_model_link, device_name,
                                    cpu_extension, threshold)
    facial_landmark_model = FacialLandmarksDetectionModel(
        facial_landmark_link, device_name, cpu_extension)
    gaze_estimation_model = GazeEstimationModel(gaze_estimation_link,
                                                device_name, cpu_extension)
    head_pose_model = HeadPoseEstimationModel(head_pose_link, device_name,
                                              cpu_extension)

    if inputPath.lower() == "cam":
        inputFeeder = InputFeeder("cam")
    else:
        if not os.path.isfile(inputPath):
            logger.error("Unable to find specified video file")
            exit(1)
        inputFeeder = InputFeeder("video", inputPath)

    # Load Models
    fm_time = time.time()
    face_model.load_model()
    fm_time = time.time() - fm_time

    flm_time = time.time()
    facial_landmark_model.load_model()
    flm_time = time.time() - flm_time

    gem_time = time.time()
    gaze_estimation_model.load_model()
    gem_time = time.time() - gem_time

    hpm_time = time.time()
    head_pose_model.load_model()
    hpm_time = time.time() - hpm_time

    benchmarks['loadtime'] = {}
    benchmarks['loadtime']['face_landmark'] = flm_time
    benchmarks['loadtime']['face_detection'] = fm_time
    benchmarks['loadtime']['gaze_estimation'] = gem_time
    benchmarks['loadtime']['head_pose_estimation'] = hpm_time

    mouse_controller = MouseController('high', 'slow')
    inputFeeder.load_data()

    frame_count = 0

    for ret, frame in inputFeeder.next_batch():

        if not ret:
            break

        # Waiting for 10ms for key input
        if cv2.waitKey(1) == 17:
            break

        FPS_COUNT = time.time()
        frame_count += 1
        increase_brightness(frame)
        if fliph:
            frame = cv2.flip(frame, 1)

        face_detection_predict_time = time.time()
        croppedFace, face_coords = face_model.predict(frame.copy())
        face_detection_predict_time = time.time() - face_detection_predict_time

        if croppedFace is None or croppedFace is 0:
            logger.error("Unable to detect the face.")
            continue

        # Head Pose prediction

        head_pose_predict_time = time.time()
        head_output = head_pose_model.predict(croppedFace.copy())
        head_pose_predict_time = time.time() - head_pose_predict_time

        # Facial Landmark prediction

        facial_landmark_predict_time = time.time()
        left_eye, right_eye, eye_coords = facial_landmark_model.predict(
            croppedFace.copy())
        facial_landmark_predict_time = time.time(
        ) - facial_landmark_predict_time

        # Gaze Estimation prediction
        gaze_estimation_predict_time = time.time()
        gaze_vector, raw_vector = gaze_estimation_model.predict(
            left_eye, right_eye, head_output)
        gaze_estimation_predict_time = time.time(
        ) - gaze_estimation_predict_time

        FPS_COUNT = time.time() - FPS_COUNT
        FPS_COUNT = 1 // FPS_COUNT
        logger.debug("FPS %s" % (FPS_COUNT))
        benchmarks['predict_time'] = {}
        benchmarks['predict_time'][
            'face_landmark'] = facial_landmark_predict_time
        benchmarks['predict_time'][
            'face_detection'] = face_detection_predict_time
        benchmarks['predict_time'][
            'gaze_estimation'] = gaze_estimation_predict_time
        benchmarks['predict_time'][
            'head_pose_estimation'] = head_pose_predict_time
        logger.debug(benchmarks)

        if previewFace or previewFaceLandmark or previewGazeEstimation or previewHeadPose:
            preview_frame = frame.copy()
            if previewFace:
                cv2.rectangle(preview_frame, (face_coords[0], face_coords[1]),
                              (face_coords[2], face_coords[3]), (255, 0, 0), 1)
                preview_frame = croppedFace
            if previewFaceLandmark:
                cv2.rectangle(croppedFace,
                              (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                              (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                              (0, 255, 0), 1)
                cv2.rectangle(croppedFace,
                              (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                              (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                              (0, 255, 0), 1)

            if previewGazeEstimation and gaze_vector:
                x, y, z = gaze_vector
                x = int(x)
                y = int(y)
                # left eye center
                left_eye_center_x = (eye_coords[0][0] + eye_coords[0][2]) // 2
                left_eye_center_y = (eye_coords[0][1] + eye_coords[0][3]) // 2
                left_eye_center_dx = left_eye_center_x + (x * 2)
                left_eye_center_dy = left_eye_center_y + (-y * 2)

                left_eye_ref = left_eye.copy()

                # right eye center
                right_eye_center_x = (eye_coords[1][0] + eye_coords[1][2]) // 2
                right_eye_center_y = (eye_coords[1][1] + eye_coords[1][3]) // 2
                right_eye_center_dx = right_eye_center_x + (x * 2)
                right_eye_center_dy = right_eye_center_y + (-y * 2)

                right_eye_ref = right_eye.copy()

                # head pose
                head_pose_y = head_output[1] * math.pi / 180

                line_size = configuration.GAZE_ARROW_LENGTH

                # gaze-axis
                cv2.arrowedLine(croppedFace,
                                (left_eye_center_x, left_eye_center_y),
                                (left_eye_center_dx, left_eye_center_dy),
                                (0, 255, 255), 1)
                cv2.arrowedLine(croppedFace,
                                (right_eye_center_x, right_eye_center_y),
                                (right_eye_center_dx, right_eye_center_dy),
                                (0, 255, 255), 1)

            if previewHeadPose:
                cv2.rectangle(preview_frame, (5, 5), (85, 65),
                              configuration.UI_COLOR, 1)
                cv2.putText(preview_frame,
                            "YAW: {:.2f}".format(head_output[0]), (10, 20),
                            configuration.DEFAULT_FONT,
                            configuration.FONT_SIZE, configuration.UI_COLOR, 1)
                cv2.putText(preview_frame,
                            "PITCH: {:.2f}".format(head_output[1]), (10, 40),
                            configuration.DEFAULT_FONT,
                            configuration.FONT_SIZE, configuration.UI_COLOR, 1)
                cv2.putText(preview_frame,
                            "ROLL: {:.2f}".format(head_output[2]), (10, 60),
                            configuration.DEFAULT_FONT,
                            configuration.FONT_SIZE, configuration.UI_COLOR, 1)

            cv2.imshow(
                "Gaze Detection [VIsualization]",
                cv2.resize(preview_frame, (configuration.PREVIEW_WIDTH,
                                           configuration.PREVIEW_HEIGHT)))

        if frame_count % configuration.MOVE_MOUSE_AFTER_FRAMES_COUNT == 0:
            logger.debug("moving mouse = x: {},y: {}".format(
                gaze_vector[0], gaze_vector[1]))
            mouse_controller.move(gaze_vector[0], gaze_vector[1])

    logger.info("Video Stream Finished...")
    cv2.destroyAllWindows()
    inputFeeder.close()
Exemplo n.º 3
0
def main():
    # get command line args
    args = build_argparser().parse_args()

    logger = log.getLogger()

    type_input = args.input

    if type_input == "CAM":
        inputFeeder = InputFeeder("cam")
    else:
        inputFeeder = InputFeeder("video", args.input)

    inputFeeder.load_data()

    mc = MouseController("medium", "fast")

    fdm = FaceDetectionModel(model_name=args.face_dectection_model,
                             device=args.device,
                             extensions=args.cpu_extension,
                             threshold=args.prob_threshold)
    fldm = FacialLandmarksModel(model_name=args.face_landmarks_model,
                                device=args.device,
                                extensions=args.cpu_extension)
    gem = GazeEstimationModel(model_name=args.gaze_estimation_model,
                              device=args.device,
                              extensions=args.cpu_extension)
    hpem = HeadPoseEstimationModel(model_name=args.head_pose_model,
                                   device=args.device,
                                   extensions=args.cpu_extension)
    data_capture = {}

    start_time = time.time()
    fdm.load_model()
    fdm_load_time = time.time()
    fldm.load_model()
    fldm_load_time = time.time()
    hpem.load_model()
    hpem_load_time = time.time()
    gem.load_model()
    gem_load_time = time.time()

    data_capture['FaceDetectionModel_loadtime'] = round(
        (fdm_load_time - start_time) * 1000, 3)
    data_capture['FacialLandmarksModel_loadtime'] = round(
        (fldm_load_time - fdm_load_time) * 1000, 3)
    data_capture['HeadPoseEstimationModel_loadtime'] = round(
        (hpem_load_time - fldm_load_time) * 1000, 3)
    data_capture['GazeEstimationModel_loadtime'] = round(
        (gem_load_time - hpem_load_time) * 1000, 3)

    for flag, frame in inputFeeder.next_batch():
        if not flag:
            break

        pressedKey = cv2.waitKey(60)

        start_infer_time = time.time()  # time to start inference
        face_coords, face_img = fdm.predict(frame)
        fdm_infertime = time.time()

        if face_coords == 0:  # if face not detected
            continue

        hpem_out = hpem.predict(face_img)
        hpem_infertime = time.time()

        left_eye, right_eye, eye_coord = fldm.predict(face_img)
        fldm_infertime = time.time()

        if left_eye.all() == 0 or right_eye.all(
        ) == 0:  # if eye are not detected
            continue

        mouse_coord, gaze_vector = gem.predict(left_eye, right_eye, hpem_out)
        gem_infertime = time.time()

        if args.preview:
            output_boxes(frame, (face_coords[0], face_coords[1]),
                         (face_coords[2], face_coords[3]))

            bound_boxes(frame, eye_coord, 45, 25, face_coords[0],
                        face_coords[1])

            text = "Yaw: {:.2f}, Pitch: {:+.2f}, Roll: {:.2f}".format(
                hpem_out[0], hpem_out[1], hpem_out[2])

            output_text(frame, text, (100, 100))

            h = frame.shape[0]
            w = frame.shape[1]

            center_of_face = (h / 2, w / 2, 0)

            draw_axes(frame,
                      center_of_face,
                      hpem_out[0],
                      hpem_out[1],
                      hpem_out[2],
                      scale=50,
                      focal_length=950)

            cv2.imshow('video', cv2.resize(frame, (500, 500)))

        cv2.imshow('video', cv2.resize(frame, (500, 500)))
        mc.move(mouse_coord[0], mouse_coord[1])

        if pressedKey == 27:
            break

    data_capture['FaceDetectionModel_Inferencetime'] = round(
        (fdm_infertime - start_infer_time) * 1000, 3)
    data_capture['HeadPoseEstimationModel_Inferencetime'] = round(
        (hpem_infertime - fdm_infertime) * 1000, 3)
    data_capture['FacialLandmarksModel_Inferencetime'] = round(
        (fldm_infertime - hpem_infertime) * 1000, 3)
    data_capture['GazeEstimationModel_Inferencetime'] = round(
        (gem_infertime - fldm_infertime) * 1000, 3)

    total_time = round((time.time() - start_infer_time) * 1000, 3)
    data_capture['Total_time'] = total_time

    df = pd.DataFrame.from_dict(data_capture,
                                orient='index',
                                columns=['time(msecs)'])
    df.to_csv("results.csv")

    logger.error("Video has ended...")
    cv2.destroyAllWindows()
    inputFeeder.close()
Exemplo n.º 4
0
def infer_on_stream(args):

    prob_threshold = args.prob_threshold

    face_detector_path = args.face_detection_model
    facial_landmark_path = args.facial_landmark_detection_model
    head_pose_path = args.head_pose_estimation_model
    gaze_est_path = args.gaze_estimation_model
    input_display = args.display_type

    device = args.device
    extension = args.cpu_extension
    input_file = args.input

    speed = args.mouse_speed
    precision = args.mouse_precision

    face_detector = Model_Face_Detect(model_name=face_detector_path,
                                      device=device,
                                      extensions=extension)
    log.info("face_detector object intitialised")
    face_landmark_detector = Model_Facial_Land(model_name=facial_landmark_path,
                                               device=device,
                                               extensions=extension)
    log.info("face_landmark_detector object initialised")
    head_pose_estimation = Model_HeadPos(model_name=head_pose_path,
                                         device=device,
                                         extensions=extension)
    log.info("head_pose_estimation object initialised")
    gaze_estimation = Model_Gaze_Est(model_name=gaze_est_path,
                                     device=device,
                                     extensions=extension)
    log.info("gaze_estimation object initialised")

    model_loading = time.time()

    start_time = time.time()
    face_detector.load_model()
    log.info("Face Detector Model Loaded...")
    face_landmark_detector.load_model()
    log.info("Facial Landmark Model Loaded...")
    head_pose_estimation.load_model()
    log.info("Head Pose Estimation Model Loaded...")
    gaze_estimation.load_model()
    log.info("Gaze Estimation Model Loaded...")
    total_models_load_time = time.time() - start_time

    try:
        input_feeder = InputFeeder(input_display, input_file)
        input_feeder.load_data()
    except:
        log.error("Something went wrong with loading camera/mouse")
        exit(0)

    mouse = MouseController(precision, speed)
    frames = 0

    start_inf_time = time.time()
    for ret, frame in input_feeder.next_batch():
        if not ret:
            break
        frames += 1

        key = cv2.waitKey(60)

        start_inf_disp = time.time()

        #original = "original"
        #cv2.namedWindow(original)        # Create a named window
        #cv2.moveWindow(original, 600,200)  # Move it to (40,30)
        #cv2.imshow(original,cv2.resize(frame,(600,600)))
        # Start inference on face_detection model
        face_coords, face_image = face_detector.predict(frame, prob_threshold)

        if (face_coords):

            # Start inference on face_landmarks_detection model
            eye_coords, left_eye, right_eye, image_proccess = face_landmark_detector.predict(
                face_image)

            # Start inference on head pose estimation model
            head_pose_angles = head_pose_estimation.predict(face_image)

            # Start inference on gaze estimation model
            mouse_coord, gaze_coord = gaze_estimation.predict(
                left_eye, right_eye, head_pose_angles)

            left_eye = (eye_coords[0][0] + 15, eye_coords[0][1] + 15)
            right_eye = (eye_coords[1][0] + 15, eye_coords[1][1] + 15)

            gaze_x = int(gaze_coord[0] * 250)
            gaze_y = int(-gaze_coord[1] * 250)

            cv2.arrowedLine(image_proccess, left_eye,
                            (left_eye[0] + gaze_x, left_eye[1] + gaze_y),
                            (80, 15, 120), 3)
            cv2.arrowedLine(image_proccess, right_eye,
                            (right_eye[0] + gaze_x, right_eye[1] + gaze_y),
                            (80, 15, 120), 3)

            inference_time = time.time() - start_inf_disp

            inf_time_display="Inference Time Per Frame: {:.3f}ms"\
                                .format(inference_time*1000)

            cv2.putText(image_proccess, inf_time_display, (10, 10),
                        cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 250, 0), 1)

            infer_img = "process_img"
            cv2.namedWindow(infer_img)
            cv2.moveWindow(infer_img, 10, 200)  # Move it to (10,200)
            cv2.imshow(infer_img, cv2.resize(image_proccess, (600, 600)))

            mouse.move(mouse_coord[0], mouse_coord[1])

    total_inference_time = time.time() - start_inf_time
    fps = int(frames) / (total_inference_time)

    with open(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'stats_' + str(device) + '.txt'), 'w') as f:
        f.write("Inference Time: {:.3f}"\
                                 .format(total_inference_time)+'\n')
        f.write("FPS:    {:.3f}"\
                                 .format(fps)+'\n')
        f.write("Model Loading Time:  {:.3f}"\
                                 .format(total_models_load_time)+'\n')

    input_feeder.close()
    cv2.destroyAllWindows()
Exemplo n.º 5
0
def main():
    """
    """

    # Grab command line args
    args = build_argparser().parse_args()

    input_src = args.input
    device = args.device
    extension = args.cpu_extension
    prob_threshold = args.prob_threshold

    face_detection_model = args.facedetectionmodel
    head_pose_model = args.headposemodel
    landmarks_model = args.facelandmarksnmodel
    gaze_estimation_model = args.gazeestimationmodel

    # Create log object set for console output and set log level
    log_obj = log.getLogger()
    log_obj.setLevel(LOGLEVEL)

    console_handler = log.StreamHandler()
    console_handler.setLevel(LOGLEVEL)
    log_obj.addHandler(console_handler)

    # Create detection objects
    face_detection_obj = FaceDetectionModel(face_detection_model, device,
                                            extension)
    head_pose_obj = HeadPoseModel(head_pose_model, device, extension)
    landmarks_obj = LandmarksModel(landmarks_model, device, extension)
    gaze_estimation_obj = GazeEstimationModel(gaze_estimation_model, device,
                                              extension)

    # Create mouse controller object
    mouse_controller = MouseController('medium', 'fast')
    # Place mouse at the center of the screen
    mouse_controller.init_position()
    log_obj.info("[Info]: Place mouse at the center of the screen")

    # Place holder for total inferencing time
    total_inference_time = 0

    # Load models and get the model loading times
    start_time = time.time()
    face_detection_obj.load_model()
    end_time = time.time()
    face_detection_loading_time = end_time - start_time

    start_time = time.time()
    head_pose_obj.load_model()
    end_time = time.time()
    head_pose_loading_time = end_time - start_time

    start_time = time.time()
    landmarks_obj.load_model()
    end_time = time.time()
    landmarks_detection_loading_time = end_time - start_time

    start_time = time.time()
    gaze_estimation_obj.load_model()
    end_time = time.time()
    gaze_estimation_loading_time = end_time - start_time

    # Configure input video source
    if input_src.lower() == 'cam':
        input_channel = InputFeeder(input_type='cam')
    elif not os.path.exists(input_src):
        log.error("Video file not found! Exiting....")
        exit(1)
    else:
        input_channel = InputFeeder(input_type='video', input_file=input_src)
        log_obj.info("[Info]: Opening video file ...")

    input_channel.load_data()
    video_width = int(input_channel.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    video_height = int(input_channel.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(input_channel.cap.get(cv2.CAP_PROP_FPS))

    frame_counter = 0
    total_face_inf_time = 0
    total_head_inf_time = 0
    total_lanmarks_inf_time = 0
    total_gaze_inf_time = 0
    frame_processing_time = 0

    # Process each frame
    try:
        for frame in input_channel.next_batch():
            frame_processing_start_time = time.time()

            frame_counter = frame_counter + 1
            key = cv2.waitKey(60)

            # Use face detection to find cropped face and provide face coordinates
            cropped_face, face_coords, face_inference_time = face_detection_obj.predict(
                frame, prob_threshold)
            total_face_inf_time = total_face_inf_time + face_inference_time

            #  Now use cropped face for head pose detection
            head_pose_estimate, head_inference_time = head_pose_obj.predict(
                cropped_face, prob_threshold)
            total_head_inf_time = total_head_inf_time + head_inference_time

            #  Now use cropped face for landmarks detection
            cropped_left_eye, cropped_right_eye, eyes_coords, converted_landmarks, landmarks_inference_time = landmarks_obj.predict(
                cropped_face, prob_threshold)
            total_lanmarks_inf_time = total_lanmarks_inf_time + landmarks_inference_time

            #  Finally gaze estimation
            gaze_vector, gaze_estimate_time = gaze_estimation_obj.predict(
                cropped_left_eye, cropped_right_eye, head_pose_estimate)
            total_gaze_inf_time = total_gaze_inf_time + gaze_estimate_time

            # Move the mouse
            #mouse_controller.move(gaze_vector[0], gaze_vector[1])

            # Show size-reduced frame for visual comparison

            # Check potential visualize flags: 'F', 'H', 'L', 'G'
            # If flag exist, process image to show inference results
            if args.visualize is not None:

                visualize_flag = str(args.visualize)

                # Draw bounding box around detected face
                if 'F' in visualize_flag:
                    cv2.rectangle(frame,
                                  (face_coords[0][0], face_coords[0][1]),
                                  (face_coords[0][2], face_coords[0][3]),
                                  (0, 255, 0), 2)

                # Show head pose parameters
                if 'H' in visualize_flag:
                    cv2.putText(
                        frame,
                        "Head pose: yaw: {:.3f}, pitch: {:.3f}, roll: {:.3f}".
                        format(head_pose_estimate[0], head_pose_estimate[1],
                               head_pose_estimate[2]), (10, 20),
                        cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 5)

                # Draw dots on detected facial landmarks
                if 'L' in visualize_flag:
                    cv2.circle(frame,
                               (converted_landmarks[0] + face_coords[0][0],
                                converted_landmarks[1] + face_coords[0][1]),
                               10, (0, 255, 0), 5)
                    cv2.circle(frame,
                               (converted_landmarks[2] + face_coords[0][0],
                                converted_landmarks[3] + face_coords[0][1]),
                               10, (0, 255, 0), 5)
                    cv2.circle(frame,
                               (converted_landmarks[4] + face_coords[0][0],
                                converted_landmarks[5] + face_coords[0][1]),
                               10, (0, 255, 0), 5)
                    cv2.circle(frame,
                               (converted_landmarks[6] + face_coords[0][0],
                                converted_landmarks[7] + face_coords[0][1]),
                               10, (0, 255, 0), 5)
                    cv2.circle(frame,
                               (converted_landmarks[8] + face_coords[0][0],
                                converted_landmarks[9] + face_coords[0][1]),
                               10, (0, 255, 0), 5)

                # Display gaze parameters
                if 'G' in visualize_flag:
                    cv2.putText(
                        frame,
                        "Gaze estimate: x: {:.3f}, y: {:.3f}, z: {:.3f}".
                        format(gaze_vector[0], gaze_vector[1], gaze_vector[2]),
                        (10, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 5)

            resized_frame = cv2.resize(frame, (640, 360))
            cv2.imshow('frame', resized_frame)

            if frame_counter % 4 == 0:
                mouse_controller.move(gaze_vector[0], gaze_vector[1])

            frame_processing_time = frame_processing_time + (
                time.time() - frame_processing_start_time) * 1000

            if key == 27:
                break

    except Exception as e:
        #traceback.print_exc()
        if 'shape' in str(e):
            log_obj.info("Video feed finished")
        else:
            log_obj.error("[ERROR]: " + str(e))
        pass

    # All done, cleaning up
    cv2.destroyAllWindows()
    input_channel.close()

    # Print out statistics
    log_obj.info("[Info]: Video source FPS: " + str(fps))
    log_obj.info("[Info]: Total frame count: " + str(frame_counter))
    log_obj.info("")
    log_obj.info("[Info]: Face detection model loading time: {:.3f} ms".format(
        face_detection_loading_time * 1000))
    log_obj.info("[Info]: Head pose model loading time: {:.3f} ms".format(
        head_pose_loading_time * 1000))
    log_obj.info(
        "[Info]: Facial landmarks detection model loading time: {:.3f} ms".
        format(landmarks_detection_loading_time * 1000))
    log_obj.info(
        "[Info]: Gaze estimation model loading time: {:.3f} ms".format(
            gaze_estimation_loading_time * 1000))
    log_obj.info("")
    log_obj.info(
        "[Info]: Average  per frame total processing time : {:.3f} ms".format(
            frame_processing_time / frame_counter))
    log_obj.info("[Info]: Average face inferencing  time: {:.3f} ms".format(
        total_face_inf_time / frame_counter))
    log_obj.info(
        "[Info]: Average head pose  inferencing  time: {:.3f} ms".format(
            total_head_inf_time / frame_counter))
    log_obj.info(
        "[Info]: Average facial landmarks inferencing  time: {:.3f} ms".format(
            total_lanmarks_inf_time / frame_counter))
    log_obj.info("[Info]: Average gaze estimate  time: {:.3f} ms".format(
        total_gaze_inf_time / frame_counter))
Exemplo n.º 6
0
def main():

    # Command line args
    args = build_argparser().parse_args()
    previewFlags = args.previewFlags

    logger = log.getLogger()
    inputFilePath = args.input
    inputFeeder = None
    if inputFilePath.lower() == "cam":
        inputFeeder = InputFeeder("cam")
    else:
        if not os.path.isfile(inputFilePath):
            logger.error("Unable to locate specified video file")
            exit(1)
        inputFeeder = InputFeeder("video", inputFilePath)

    modelPathDict = {
        'FaceDetect': args.facedetection,
        'FacialDetect': args.facialdetection,
        'GazeEstimate': args.gazeestimation,
        'HeadPoseEstimation': args.headpose
    }

    for fileNameKey in modelPathDict.keys():
        if not os.path.isfile(modelPathDict[fileNameKey]):
            logger.error("Unable to find specified " + fileNameKey +
                         " xml file")
            exit(1)

    fdm = FaceDetect(modelPathDict['FaceDetect'], args.device,
                     args.cpu_extension)
    fldm = FacialDetect(modelPathDict['FacialDetect'], args.device,
                        args.cpu_extension)
    gem = GazeEstimate(modelPathDict['GazeEstimate'], args.device,
                       args.cpu_extension)
    hpem = HeadPoseEstimation(modelPathDict['HeadPoseEstimation'], args.device,
                              args.cpu_extension)

    mc = MouseController('medium', 'fast')

    inputFeeder.load_data()
    fdm.load_model()
    fldm.load_model()
    gem.load_model()
    hpem.load_model()

    frame_count = 0
    for ret, frame in inputFeeder.next_batch():
        if not ret:
            break
        frame_count += 1
        if frame_count % 5 == 0:
            cv2.imshow('video', cv2.resize(frame, (500, 500)))

        key = cv2.waitKey(60)
        croppedFace, face_coords = fdm.predict(frame.copy(),
                                               args.prob_threshold)
        if type(croppedFace) == int:
            logger.error("Face not detected.")
            if key == 27:
                break
            continue

        hp_out = hpem.predict(croppedFace.copy())

        left_eye, right_eye, eye_coords = fldm.predict(croppedFace.copy())

        new_mouse_coord, gaze_vector = gem.predict(left_eye, right_eye, hp_out)

        if (not len(previewFlags) == 0):
            preview_frame = frame.copy()
            if 'fd' in previewFlags:
                preview_frame = croppedFace
            if 'fl' in previewFlags:
                cv2.rectangle(croppedFace,
                              (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                              (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                              (0, 255, 0), 3)
                cv2.rectangle(croppedFace,
                              (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                              (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                              (0, 255, 0), 3)
            if 'hp' in previewFlags:
                cv2.putText(
                    preview_frame,
                    "Pose Angles: yaw:{:.2f} | Pitch:{:.2f} | Roll:{:.2f}".
                    format(hp_out[0], hp_out[1], hp_out[2]), (10, 20),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (239, 174, 0), 2)
            if 'ge' in previewFlags:
                x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] *
                                                        12), 160
                le = cv2.line(left_eye.copy(), (x - w, y - w), (x + w, y + w),
                              (255, 0, 255), 2)
                cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2)
                re = cv2.line(right_eye.copy(), (x - w, y - w), (x + w, y + w),
                              (255, 0, 255), 2)
                cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2)
                croppedFace[eye_coords[0][1]:eye_coords[0][3],
                            eye_coords[0][0]:eye_coords[0][2]] = le
                croppedFace[eye_coords[1][1]:eye_coords[1][3],
                            eye_coords[1][0]:eye_coords[1][2]] = re

        cv2.imshow("visualization", cv2.resize(preview_frame, (500, 500)))

        if frame_count % 5 == 0:
            mc.move(new_mouse_coord[0], new_mouse_coord[1])
        if key == 27:
            break

    logger.error("VideoStreaming Ended")
    cv2.destroyAllWindows()
    inputFeeder.close()
Exemplo n.º 7
0
def main():

    # Get arguments
    args = build_argparser().parse_args()
    cursor = MouseController('medium', 'fast')
    # Set logger and error messages
    logs = logging.getLogger()
    if args.input.lower() == "cam":
        inputFeeder = InputFeeder("cam")
    else:
        if not os.path.isfile(args.input):
            logs.error("Unable to find specified video file")
        inputFeeder = InputFeeder("video", args.input)

    if not os.path.isfile(args.face_detection_model):
        logs.error("Error: face detection model's xml file not found")
    if not os.path.isfile(args.facial_landmark_model):
        logs.error("Error: facial landmark model's xml file not found")
    if not os.path.isfile(args.gaze_estimation_model):
        logs.error("Error: gaze estimation model's xml file not found")
    if not os.path.isfile(args.head_pose_model):
        logs.error("Error: head pose model's xml file not found")

    # Load and check the model
    Landmark = Model_LandmarkDetection(args.facial_landmark_model, args.device,
                                       args.cpu_extension)
    Landmark.check_model()
    fdm = Model_FaceDetection(args.face_detection_model, args.device,
                              args.cpu_extension)
    fdm.check_model()
    hpem = Model_PoseEstimation(args.head_pose_model, args.device,
                                args.cpu_extension)
    hpem.check_model()
    gem = Model_GazeEstimation(args.gaze_estimation_model, args.device,
                               args.cpu_extension)
    gem.check_model()
    load_data = [
        inputFeeder.load_data(),
        Landmark.load_model(),
        fdm.load_model(),
        hpem.load_model(),
        gem.load_model()
    ]
    load_data

    # Processes
    f_count = 0
    for ret, frame in inputFeeder.next_batch():
        if not ret:
            break
        f_count = f_count + 1
        show_video = cv2.imshow('Video', cv2.resize(frame, (500, 500)))
        if f_count % 5 == 0: show_video
        prob_thr = args.prob_threshold
        key = cv2.waitKey(60)
        frame_a = frame.copy()
        pred = fdm.predict(frame_a, prob_thr)
        cropped, co_ords = pred
        typ = type(cropped)
        if typ == int:
            logs.error("Face not detected")
            if key == 27: break
            continue
        crop_a = cropped.copy()
        pose = hpem.predict(crop_a)
        left_eye, right_eye, box = Landmark.predict(cropped.copy())
        cursor_co, gaze_vector = gem.predict(left_eye, right_eye, pose)
        x, y = box[0][0] - 10, box[0][1] - 10
        x1, y1 = box[0][2] + 10, box[0][3] + 10
        x2, y2 = box[1][0] - 10, box[1][1] - 10
        x3, y3 = box[1][2] + 10, box[1][3] + 10
        color1 = (255, 255, 255)
        color2 = (237, 48, 202)
        text_pos, text_pos2 = (10, 50), (10, 100)
        fontScale = 0.6
        font = 1
        fontColor = (255, 255, 255)
        lineType = 1
        if (len(args.preview_flags) != 0):
            preview_frame = frame.copy()
            if 'ldm' in args.preview_flags:
                cv2.rectangle(cropped, (x, y), (x1, y1), color1, 2)
                cv2.rectangle(cropped, (x2, y2), (x3, y3), color1, 2)

            if 'fdm' in args.preview_flags:
                cv2.rectangle(preview_frame, (co_ords[0], co_ords[1]),
                              (co_ords[2], co_ords[3]), (255, 0, 0), 3)
                preview_frame = cropped
            if 'gem' in args.preview_flags:
                x = int(gaze_vector[0] * 12)
                y = int(gaze_vector[1] * 12)
                w = 160
                le = left_eye.copy()
                re = right_eye.copy()
                thick = 2
                start_a, end_a = (x - w, y - w), (x + w, y + w)
                start_b, end_b = (x - w, y + w), (x + w, y - w)
                cv2.line(left_eye, start_b, end_b, color2, thick)
                cv2.line(right_eye, start_b, end_b, color2, thick)
                left = cv2.line(le, start_a, end_a, color2, thick)
                right = cv2.line(re, start_a, end_a, color2, thick)
                cv2.line(re, start_a, end_a, color2, thick)
                a1, b1, c1, d1 = box[0][0], box[0][1], box[0][2], box[0][3]
                a2, b2, c2, d2 = box[1][0], box[1][1], box[1][2], box[1][3]
                cropped[b1:d1, a1:c1], cropped[b2:d2, a2:c2] = left, right

            if 'hpm' in args.preview_flags:
                cv2.putText(
                    preview_frame,
                    """Angles: Roll= {:.1f} , Pitch= {:.1f} , Yaw= {:.1f} """.
                    format(pose[2], pose[1], pose[0]), text_pos, font,
                    fontScale, fontColor, lineType)

            show_video = cv2.imshow("", cv2.resize(preview_frame, (500, 500)))
            show_video
        if f_count % 5 == 0: cursor.move(cursor_co[0], cursor_co[1])
        if key == 27: break
    # Ends program
    end_msg, end_feed, end_video = logs.error(
        "Video ended."), inputFeeder.close(), cv2.destroyAllWindows()
    end_msg
    end_feed
    end_video
def main():
    args = build_argparser().parse_args()

    log.debug(args)

    # Load face detection model
    faceDetection = ModelFaceDetection(args.face_detection_model,
                                       args.prob_threshold, args.device,
                                       args.cpu_extension)
    start_model_load_time = time.time()
    faceDetection.load_model()
    facedetection_model_load_time = time.time() - start_model_load_time

    log.debug('Facedetection model load time. {}'.format(
        facedetection_model_load_time))

    #Load Head pose estimation model
    headPoseEstimation = ModelHeadPoseEstimation(
        args.headpose_estimation_model, args.prob_threshold, args.device,
        args.cpu_extension)
    start_model_load_time = time.time()
    headPoseEstimation.load_model()
    headposeestimation_model_load_time = time.time() - start_model_load_time

    log.debug('Head pose estimation model load time. {}'.format(
        headposeestimation_model_load_time))

    #Facial landmark model
    facialLandmarkDetection = ModelFacialLandmarkDetection(
        args.landmarks_regression_model, args.prob_threshold, args.device,
        args.cpu_extension)
    start_model_load_time = time.time()
    facialLandmarkDetection.load_model()
    facialLandmarkDetection_model_load_time = time.time(
    ) - start_model_load_time

    log.debug('Facial landmarks detection model load time. {}'.format(
        facialLandmarkDetection_model_load_time))

    #Gaze estimation model
    gazeEstimation = ModelGazeEstimation(args.gaze_estimation_model,
                                         args.prob_threshold, args.device,
                                         args.cpu_extension)
    start_model_load_time = time.time()
    gazeEstimation.load_model()
    gazeEstimation_model_load_time = time.time() - start_model_load_time

    log.debug('Gaze estimation model load time. {}'.format(
        gazeEstimation_model_load_time))

    # Feeder
    feeder = InputFeeder(args.input)
    feeder.load_data()

    counter = 0
    window_name = 'frame'

    facedetection_inference_time_sum = 0
    headpose_inference_time_sum = 0
    faciallandmark_inference_time_sum = 0
    gazeestimation_inference_time_sum = 0

    #Process Framea
    for frame in feeder.next_batch():
        if frame is None:
            break

        key_pressed = cv2.waitKey(1)
        if key_pressed == 27:
            break

        #Face detection
        start_inference_time = time.time()
        face_image, face_coords = faceDetection.predict(frame)
        facedetection_inference_time = time.time() - start_inference_time
        facedetection_inference_time_sum += facedetection_inference_time

        #Head pose estimation
        start_inference_time = time.time()
        yaw, pitch, roll = headPoseEstimation.predict(face_image)
        headpose_inference_time = time.time() - start_inference_time
        headpose_inference_time_sum += headpose_inference_time

        # log.debug('Head pose yaw, pirch ,roll {}, {}, {}'.format(yaw, pitch, roll))

        #Facial landmarks detection
        start_inference_time = time.time()
        left_eye_image, right_eye_image, eye_coords = facialLandmarkDetection.predict(
            face_image)
        faciallandmark_inference_time = time.time() - start_inference_time
        faciallandmark_inference_time_sum += faciallandmark_inference_time

        # cv2.imwrite('left_eye.png', left_eye_image)
        # cv2.imwrite('right_eye.png', right_eye_image)
        # cv2.imwrite('face.png', face_image)

        #Gaze estimation
        start_inference_time = time.time()
        gaze_vector = gazeEstimation.predict(left_eye_image, right_eye_image,
                                             [yaw, pitch, roll])
        gazeestimation_inference_time = time.time() - start_inference_time
        gazeestimation_inference_time_sum += gazeestimation_inference_time

        #log.debug('Gaze Vector {}, {}'.format(gaze_vector[0], gaze_vector[1]))

        #Mouse
        if (counter % 2 == 0):
            mouse = MouseController('high', 'fast')
            mouse.move(gaze_vector[0], gaze_vector[1])

        #Display frame
        if (args.show):
            font = cv2.FONT_HERSHEY_SIMPLEX

            if 0 < len(face_coords):
                #face rect
                fxmin = face_coords[0][0]
                fymin = face_coords[0][1]
                fxmax = face_coords[0][2]
                fymax = face_coords[0][3]

                cv2.rectangle(frame, (fxmin, fymin), (fxmax, fymax),
                              (200, 0, 0), 2)

                #eye rect
                cv2.rectangle(
                    frame,
                    (fxmin + eye_coords[0][0], fymin + eye_coords[0][1]),
                    (fxmin + eye_coords[0][2], fymin + eye_coords[0][3]),
                    (0, 200, 0), 2)
                cv2.rectangle(
                    frame,
                    (fxmin + eye_coords[1][0], fymin + eye_coords[1][1]),
                    (fxmin + eye_coords[1][2], fymin + eye_coords[1][3]),
                    (0, 200, 0), 2)

                #Face position
                length = 100
                yaw = math.radians(yaw)
                pitch = math.radians(-pitch)
                roll = math.radians(roll)
                x1 = int(length * (math.cos(yaw) * math.cos(roll)))
                y1 = int(length *
                         (math.cos(pitch) * math.sin(roll) +
                          math.cos(roll) * math.sin(pitch) * math.sin(yaw)))

                x2 = int(length * (-math.cos(yaw) * math.sin(roll)))
                y2 = int(length *
                         (math.cos(pitch) * math.cos(roll) +
                          math.sin(pitch) * math.sin(yaw) * math.sin(roll)))

                x3 = int(length * (math.sin(yaw)))
                y3 = int(length * (-math.cos(yaw) * math.sin(pitch)))

                cv2.line(frame, (fxmin, fymin), (fxmin + x1, fymin + y1),
                         (0, 255, 0), 2)
                cv2.line(frame, (fxmin, fymin), (fxmin + x2, fymin + y2),
                         (255, 0, 0), 2)
                cv2.line(frame, (fxmin, fymin), (fxmin + x3, fymin + y3),
                         (0, 0, 255), 2)

                #gaze
                x = int(length * gaze_vector[0])
                y = -int(length * gaze_vector[1])

                cv2.line(frame, (fxmax, fymax), (fxmax + x, fymax + y),
                         (0, 255, 255), 5)

            else:
                cv2.putText(frame, 'Face not detected', (10, 10), font, 1,
                            (255, 255, 255), 1)

            cv2.imshow(
                window_name,
                cv2.resize(frame,
                           (int(frame.shape[1] / 3), int(frame.shape[0] / 3))))

        counter += 1

    log.debug("Face detection inference time average {}".format(
        facedetection_inference_time_sum / counter))
    log.debug("Headpose inference time average  {}".format(
        headpose_inference_time_sum / counter))
    log.debug("Faciallandmark inference time average {}".format(
        faciallandmark_inference_time_sum / counter))
    log.debug("Gazeestimation inference time average {}".format(
        gazeestimation_inference_time_sum / counter))

    if (args.show):
        cv2.destroyWindow(window_name)
def infer_on_stream(args, logger):
    visualizers = args.visualize
    video_file = args.input
    input_feeder = None

    if video_file.lower() == "cam":
        input_feeder = InputFeeder("cam")
    else:
        try:
            input_feeder = InputFeeder("video", video_file)
        except FileNotFoundError:
            logger.error("Unable to find specified video file")
            exiit(1)
    input_feeder.load_data()
    mouse = MouseController('medium', 'fast')
    #load models
    fdm, fldm, gem, hpem, face_detect_laoding_time, facial_detect_laoding_time, head_pose_estimation_laoding_time, gaze_estimation_laoding_time, total_loading_time, status = load_models(
        args, logger)
    if status != 0:
        #if any model is not loaded
        exit(1)
    frame_count = 0
    #start time of the inferencing
    start_inf_time = time.time()
    #iterate till the break key is pressed
    for flag, frame in input_feeder.next_batch():
        if not flag:
            break
        frame_count += 1
        if frame_count % 3 == 0:
            cv2.imshow('video', cv2.resize(frame, (500, 500)))

        key = cv2.waitKey(60)
        cropped_image, face_coords = fdm.predict(frame.copy(),
                                                 args.prob_threshold)
        if type(cropped_image) == int:
            logger.error("Unable to detect the face.")
            if key == 27:
                break
            continue

        hp_out = hpem.predict(cropped_image.copy())
        left_eye, right_eye, eye_coords = fldm.predict(cropped_image.copy())
        mouse_coord, vector = gem.predict(left_eye, right_eye, hp_out)

        if (not len(visualizers) == 0):
            preview_frame = frame.copy()
            switches = {"fd": 0, "fld": 1, "hp": 2, "ge": 3}
            for i in visualizers:
                val = switches.get(i)
                if val == 0:
                    logger.error("Visualising: Face")
                    face_detect_visualize(preview_frame, face_coords)
                if val == 1:
                    logger.error("Visualising: Facial Landmarks")
                    facial_landmarks_visualize(preview_frame, cropped_image,
                                               eye_coords, face_coords)
                if val == 2:
                    logger.error("Visualising: Head Pose")
                    headpose_visualize(preview_frame, hp_out, face_coords)
                if val == 3:
                    logger.error("Visualising: Gaze")
                    x = vector[0]
                    y = vector[1]
                    gaze_visualize(preview_frame, cropped_image, x, y,
                                   left_eye, right_eye, eye_coords,
                                   face_coords)
            cv2.imshow("visualization", cv2.resize(preview_frame, (500, 500)))

        if frame_count % 3 == 0:
            mouse.move(mouse_coord[0], mouse_coord[1])

        if key == 27:
            #if benchmarking is enabled
            if (args.benchmark == "true"):
                logger.error("Face Detection Model Loading Time: {}s".format(
                    face_detect_laoding_time))
                logger.error(
                    "Facial Landmarks Detection Model Loading Time: {}s".
                    format(facial_detect_laoding_time))
                logger.error(
                    "Head Pose Estimation Model Loading Time: {}s".format(
                        head_pose_estimation_laoding_time))
                logger.error("Gaze Estimation Model Loading Time: {}s".format(
                    gaze_estimation_laoding_time))
                logger.error(
                    "Total Loading Time: {}s".format(total_loading_time))
                inference_time = round(time.time() - start_inf_time, 1)
                fps = int(frame_count) / inference_time
                logger.error(
                    "total inference time {} seconds".format(inference_time))
                logger.error("fps {} frame/second".format(fps))
                with open(
                        os.path.join(
                            os.path.dirname(os.path.abspath(__file__)),
                            'benchmark.txt'), 'w') as f:
                    f.write(
                        str("Total Inference Time: " + str(inference_time) +
                            '\n'))
                    f.write(str("Total FPS: " + str(fps) + '\n'))
                    f.write(
                        str("Total Model Loading Time: " +
                            str(total_loading_time) + '\n'))
            break
    logger.error("VideoStream ended...")
    cv2.destroyAllWindows()
    input_feeder.close()
Exemplo n.º 10
0
def run_app(args):
    face_detection_model = Model_Face_Detection(args.model_path_fd,
                                                args.device,
                                                args.cpu_extension,
                                                threshold=args.threshold)
    face_detection_model.load_model()
    head_pose_model = Model_Head_Pose_Estimation(args.model_path_hp,
                                                 args.device,
                                                 args.cpu_extension)
    head_pose_model.load_model()
    face_landmark_model = Model_Facial_Landmarks(args.model_path_fl,
                                                 args.device,
                                                 args.cpu_extension)
    face_landmark_model.load_model()
    gaze_model = Model_Gaze_Estimation(args.model_path_ge, args.device,
                                       args.cpu_extension)
    gaze_model.load_model()

    input_feeder = InputFeeder(
        args.input_type,
        args.input_file,
    )
    input_feeder.load_data()
    mouse_controller = MouseController("medium", "fast")
    # while input_feeder.cap.isOpened():
    # feed_out=input_feeder.next_batch()

    frame_count = 0
    custom = args.toggle

    for frame in input_feeder.next_batch():

        if frame is None:
            break
        key_pressed = cv2.waitKey(60)
        frame_count += 1
        face_out, cords = face_detection_model.predict(frame.copy())

        # When no face was detected
        if cords == 0:
            inf_info = "No Face Detected in the Frame"
            write_text_img(frame, inf_info, 400)
            continue

        eyes_cords, left_eye, right_eye = face_landmark_model.predict(
            face_out.copy())
        head_pose_out = head_pose_model.predict(face_out.copy())
        gaze_out = gaze_model.predict(left_eye, right_eye, head_pose_out)

        # Faliure in processing both eyes
        if gaze_out is None:
            continue
        x, y = gaze_out
        if frame_count % 5 == 0:
            mouse_controller.move(x, y)
        inf_info = "Head Pose (y: {:.2f}, p: {:.2f}, r: {:.2f})".format(
            head_pose_out[0], head_pose_out[1], head_pose_out[2])
        # Process Visualization
        if 'frame' in custom:
            visualization(frame, cords, face_out, eyes_cords)

        if 'stats' in custom:
            write_text_img(face_out, inf_info, 400)
            inf_info = "Gaze Angle: x: {:.2f}, y: {:.2f}".format(x, y)
            log.info("Statistic " + inf_info)
            write_text_img(face_out, inf_info, 400, 15)
        if 'gaze' in custom:
            display_head_pose(frame, head_pose_out, cords)

        out_f = np.hstack(
            (cv2.resize(frame, (400, 400)), cv2.resize(face_out, (400, 400))))
        cv2.imshow('Visualization', out_f)
        if key_pressed == 27:
            break
    input_feeder.close()
    cv2.destroyAllWindows()
Exemplo n.º 11
0
def main():
    args = build_argparser().parse_args()
    logger = logging.getLogger('main')

    is_benchmarking = False
    # initialize variables with the input arguments for easy access
    model_path_dict = {
        'FaceDetectionModel': args.faceDetectionModel,
        'LandmarkRegressionModel': args.landmarkRegressionModel,
        'HeadPoseEstimationModel': args.headPoseEstimationModel,
        'GazeEstimationModel': args.gazeEstimationModel
    }
    preview_flags = args.previewFlags
    input_filename = args.input
    device_name = args.device
    prob_threshold = args.prob_threshold
    output_path = args.output_path

    if input_filename.lower() == 'cam':
        feeder = InputFeeder(input_type='cam')
    else:
        if not os.path.isfile(input_filename):
            logger.error("Unable to find specified video file")
            exit(1)
        feeder = InputFeeder(input_type='video', input_file=input_filename)

    for model_path in list(model_path_dict.values()):
        if not os.path.isfile(model_path):
            logger.error("Unable to find specified model file" + str(model_path))
            exit(1)

    # instantiate model
    face_detection_model = FaceDetectionModel(model_path_dict['FaceDetectionModel'], device_name, threshold=prob_threshold)
    landmark_detection_model = LandmarkDetectionModel(model_path_dict['LandmarkRegressionModel'], device_name, threshold=prob_threshold)
    head_pose_estimation_model = HeadPoseEstimationModel(model_path_dict['HeadPoseEstimationModel'], device_name, threshold=prob_threshold)
    gaze_estimation_model = GazeEstimationModel(model_path_dict['GazeEstimationModel'], device_name, threshold=prob_threshold)

    if not is_benchmarking:
        mouse_controller = MouseController('medium', 'fast')

    # load Models
    start_model_load_time = time.time()
    face_detection_model.load_model()
    landmark_detection_model.load_model()
    head_pose_estimation_model.load_model()
    gaze_estimation_model.load_model()
    total_model_load_time = time.time() - start_model_load_time

    feeder.load_data()

    out_video = cv2.VideoWriter(os.path.join('output_video.mp4'), cv2.VideoWriter_fourcc(*'avc1'), int(feeder.get_fps()/10),
                                (1920, 1080), True)

    frame_count = 0
    start_inference_time = time.time()
    for ret, frame in feeder.next_batch():

        if not ret:
            break

        frame_count += 1

        key = cv2.waitKey(60)

        try:
            face_cords, cropped_image = face_detection_model.predict(frame)

            if type(cropped_image) == int:
                logger.warning("Unable to detect the face")
                if key == 27:
                    break
                continue

            left_eye_image, right_eye_image, eye_cords = landmark_detection_model.predict(cropped_image)
            pose_output = head_pose_estimation_model.predict(cropped_image)
            mouse_cord, gaze_vector = gaze_estimation_model.predict(left_eye_image, right_eye_image, pose_output)

        except Exception as e:
            logger.warning("Could predict using model" + str(e) + " for frame " + str(frame_count))
            continue

        image = cv2.resize(frame, (500, 500))

        if not len(preview_flags) == 0:
            preview_frame = draw_preview(
                frame, preview_flags, cropped_image, left_eye_image, right_eye_image,
                face_cords, eye_cords, pose_output, gaze_vector)
            image = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_frame, (500, 500))))

        cv2.imshow('preview', image)
        out_video.write(frame)

        if frame_count % 5 == 0 and not is_benchmarking:
            mouse_controller.move(mouse_cord[0], mouse_cord[1])

        if key == 27:
            break

    total_time = time.time() - start_inference_time
    total_inference_time = round(total_time, 1)
    fps = frame_count / total_inference_time

    try:
        os.mkdir(output_path)
    except OSError as error:
        logger.error(error)

    with open(output_path+'stats.txt', 'w') as f:
        f.write(str(total_inference_time) + '\n')
        f.write(str(fps) + '\n')
        f.write(str(total_model_load_time) + '\n')

    logger.info('Model load time: ' + str(total_model_load_time))
    logger.info('Inference time: ' + str(total_inference_time))
    logger.info('FPS: ' + str(fps))

    logger.info('Video stream ended')
    cv2.destroyAllWindows()
    feeder.close()
Exemplo n.º 12
0
def main(args):
    logger = logging.getLogger()

    feeder = None
    if args.input_type == constants.VIDEO or args.input_type == constants.IMAGE:
        extension = str(args.input).split('.')[1]
        # if not extension.lower() in constants.ALLOWED_EXTENSIONS:
        #     logger.error('Please provide supported extension.' + str(constants.ALLOWED_EXTENSIONS))
        #     exit(1)

        # if not os.path.isfile(args.input):
        #     logger.error("Unable to find specified video/image file")
        #     exit(1)

        feeder = InputFeeder(args.input_type, args.input)
    elif args.input_type == constants.IP_CAMERA:
        if not str(args.input).startswith('http://'):
            logger.error('Please provide ip of server with http://')
            exit(1)

        feeder = InputFeeder(args.input_type, args.input)
    elif args.input_type == constants.WEBCAM:
        feeder = InputFeeder(args.input_type)

    mc = MouseController("medium", "fast")

    feeder.load_data()

    face_model = Face_Model(args.face, args.device, args.cpu_extension)
    face_model.check_model()

    landmark_model = Landmark_Model(args.landmarks, args.device,
                                    args.cpu_extension)
    landmark_model.check_model()

    gaze_model = Gaze_Estimation_Model(args.gazeestimation, args.device,
                                       args.cpu_extension)
    gaze_model.check_model()

    head_model = Head_Pose_Model(args.headpose, args.device,
                                 args.cpu_extension)
    head_model.check_model()

    face_model.load_model()
    logger.info("Face Detection Model Loaded...")
    landmark_model.load_model()
    logger.info("Landmark Detection Model Loaded...")
    gaze_model.load_model()
    logger.info("Gaze Estimation Model Loaded...")
    head_model.load_model()
    logger.info("Head Pose Detection Model Loaded...")
    print('Loaded')

    try:
        frame_count = 0
        for ret, frame in feeder.next_batch():
            if not ret:
                break

            if frame is None:
                continue

            frame_count += 1
            crop_face = None
            if True:

                crop_face, box = face_model.predict(frame.copy())

                if crop_face is None:
                    logger.error("Unable to detect the face.")
                    continue
                imshow('frame', crop_face, width=400)

                (lefteye_x, lefteye_y), (
                    righteye_x, righteye_y
                ), eye_coords, left_eye, right_eye = landmark_model.predict(
                    crop_face.copy(), eye_surrounding_area=15)

                # imshow("left_eye", left_eye, width=100)
                # imshow("right_eye", right_eye, width=100)
                '''TODO dlib is better to crop eye with perfection'''

                head_position = head_model.predict(crop_face.copy())

                if True:
                    if cv2.waitKey(20) & 0xFF == ord('q'):
                        break
                    continue

                gaze, (mousex,
                       mousey) = gaze_model.predict(left_eye.copy(),
                                                    right_eye.copy(),
                                                    head_position)

                if (len(args.debug) > 0):
                    debuFrame = frame.copy()
                    if crop_face is None:
                        continue

                    thickness = 2
                    radius = 2
                    color = (0, 0, 255)
                    [[le_xmin, le_ymin, le_xmax, le_ymax],
                     [re_xmin, re_ymin, re_xmax, re_ymax]] = eye_coords

                    if 'face' in args.debug:
                        cv2.rectangle(debuFrame, (box[0], box[1]),
                                      (box[2], box[3]), (255, 255, 255), 2)

                        cv2.rectangle(crop_face, (re_xmin, re_ymin),
                                      (re_xmax, re_ymax), (100, 255, 100), 2)
                        cv2.rectangle(crop_face, (le_xmin, le_ymin),
                                      (le_xmax, le_ymax), (100, 255, 100), 2)
                    '''
                    LandMark
                    '''

                    cv2.circle(crop_face, (lefteye_x, lefteye_y), radius,
                               color, thickness)
                    cv2.circle(crop_face, (righteye_x, righteye_y), radius,
                               color, thickness)

                    debuFrame[box[1]:box[3], box[0]:box[2]] = crop_face

                    if 'headpose' in args.debug:
                        yaw = head_position[0]
                        pitch = head_position[1]
                        roll = head_position[2]

                        sinY = math.sin(yaw * math.pi / 180.0)
                        sinP = math.sin(pitch * math.pi / 180.0)
                        sinR = math.sin(roll * math.pi / 180.0)

                        cosY = math.cos(yaw * math.pi / 180.0)
                        cosP = math.cos(pitch * math.pi / 180.0)
                        cosR = math.cos(roll * math.pi / 180.0)

                        cH, cW = crop_face.shape[:2]
                        arrowLength = 0.4 * cH * cW

                        xCenter = int(cW / 2)
                        yCenter = int(cH / 2)

                        # center to right
                        # cv2.line(crop_face, (xCenter, yCenter),
                        #          (int((xCenter + arrowLength * (cosR * cosY + sinY * sinP * sinR))),
                        #           int((yCenter + arrowLength * cosP * sinR))), (186, 204, 2), 1)
                        #
                        #             # center to top
                        #             cv2.line(crop_face, (xCenter, yCenter),
                        #                      (int(((xCenter + arrowLength * (cosR * sinY * sinP + cosY * sinR)))),
                        #                       int((yCenter - arrowLength * cosP * cosR))), (186, 204, 2), 1)
                        #
                        #             # center to forward
                        #             cv2.line(crop_face, (xCenter, yCenter),
                        #                      (int(((xCenter + arrowLength * sinY * cosP))),
                        #                       int((yCenter + arrowLength * sinP))), (186, 204, 2), 1)
                        #
                        cv2.putText(
                            crop_face,
                            'head pose: (y={:.2f}, p={:.2f}, r={:.2f})'.format(
                                yaw, pitch, roll), (0, 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)

                    if 'gaze' in args.debug:
                        cH, cW = crop_face.shape[:2]
                        arrowLength = 0.6 * cH

                        gazeArrowX = gaze[0] * arrowLength
                        gazeArrowY = -gaze[1] * arrowLength

                        debuFrame[box[1]:box[3], box[0]:box[2]] = crop_face

                        cv2.arrowedLine(crop_face, (lefteye_x, lefteye_y),
                                        (int(lefteye_x + gazeArrowX),
                                         int(lefteye_y + gazeArrowY)),
                                        (184, 113, 57), 2)
                        cv2.arrowedLine(crop_face, (righteye_x, righteye_y),
                                        (int(righteye_x + gazeArrowX),
                                         int(righteye_y + gazeArrowY)),
                                        (184, 113, 57), 2)

                        cv2.putText(crop_face,
                                    'gaze angles: h={}, v={}'.format("!", "2"),
                                    (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                    (255, 255, 255), 1)

                        debuFrame[box[1]:box[3], box[0]:box[2]] = crop_face

            #
            #             imshow("face", crop_face, width=400)
            #             cv2.moveWindow("face", 0, 0)
            #             imshow("debug", debuFrame, width=400)
            #             cv2.moveWindow("debug", cW * 2, cH)

            # try:
            #     if frame_count % 5 == 0:
            #         mc.move(mousex, mousey)
            # except Exception as err:
            #     logger.error("Moving cursor outside the PC not supported yet !!")

            # key = cv2.waitKey(60)
                    imshow('frame', debuFrame, width=1210)

            if cv2.waitKey(20) & 0xFF == ord('q'):
                break
    except Exception as err:
        logger.error(err)

    cv2.destroyAllWindows()
    feeder.close()
def main():

    try:
        args = build_argparser().parse_args()

        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s [%(levelname)s] %(message)s",
            handlers=[
                logging.FileHandler("computer-pointer-controller.log"),
                logging.StreamHandler()
            ])

        print_output_frame = args.print_output_frame

        logger = logging.getLogger()

        input_file_path = args.input
        feeder = None

        if input_file_path.lower() == "CAM":
            feeder = InputFeeder("cam")
        else:
            if not os.path.isfile(input_file_path):
                logger.error("Unable to find specified video file")
                exit(1)
            feeder = InputFeeder("video", input_file_path)

        mc = MouseController('low', 'fast')
        feeder.load_data()

        modelPathDict = {
            'FaceDetectionModel': args.face,
            'FacialLandmarksDetectionModel': args.landmark,
            'GazeEstimationModel': args.gazeestimation,
            'HeadPoseEstimationModel': args.headpose
        }

        for fileNameKey in modelPathDict.keys():
            if not os.path.isfile(modelPathDict[fileNameKey] + '.xml'):
                logger.error("Unable to find specified " + fileNameKey +
                             " xml file")
                exit(1)

        logging.info("============== Models Load time ===============")
        face_detection = FaceDetection(args.face, args.device,
                                       args.prob_threshold, args.cpu_extension)
        start_time = time.time()
        face_detection.load_model()
        logging.info("Face Detection Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))

        landmarks_detection = FacialLandmarksDetection(args.landmark,
                                                       args.device,
                                                       args.cpu_extension)
        start_time = time.time()
        landmarks_detection.load_model()
        logging.info("Facial Landmarks Detection Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))

        gaze_estimation = GazeEstimation(args.gazeestimation, args.device,
                                         args.cpu_extension)
        start_time = time.time()
        gaze_estimation.load_model()
        logging.info("Gaze Estimation Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))

        headpose_estimation = HeadPoseEstimation(args.headpose, args.device,
                                                 args.cpu_extension)
        start_time = time.time()
        headpose_estimation.load_model()
        logging.info("Headpose Estimation Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))
        logging.info("==============  End =====================")

        frame_count = 0
        fd_infertime = 0
        lm_infertime = 0
        hp_infertime = 0
        ge_infertime = 0

        for ret, frame in feeder.next_batch():
            if not ret:
                break
            frame_count += 1
            key = cv2.waitKey(60)

            start_time = time.time()
            cropped_face, face_coords = face_detection.predict(frame.copy())
            fd_infertime += time.time() - start_time

            if len(cropped_face) == 0:
                logger.error("Unable to detect the face.")
                continue

            start_time = time.time()
            headpose_out = headpose_estimation.predict(cropped_face.copy())
            hp_infertime += time.time() - start_time

            start_time = time.time()
            left_eye, right_eye, eye_coords = landmarks_detection.predict(
                cropped_face.copy())
            lm_infertime += time.time() - start_time

            start_time = time.time()
            new_mouse_coord, gaze_vector = gaze_estimation.predict(
                left_eye, right_eye, headpose_out)
            ge_infertime += time.time() - start_time

            if print_output_frame:
                preview_frame = frame.copy()
                if 'fd' in print_output_frame:
                    preview_frame = cropped_face
                    cv2.rectangle(frame, (face_coords[0], face_coords[1]),
                                  (face_coords[2], face_coords[3]),
                                  (255, 0, 0), 3)

                if 'fl' in print_output_frame:
                    cv2.rectangle(cropped_face,
                                  (eye_coords[0][0], eye_coords[0][1]),
                                  (eye_coords[0][2], eye_coords[0][3]),
                                  (0, 255, 0), 2)
                    cv2.rectangle(cropped_face,
                                  (eye_coords[1][0], eye_coords[1][1]),
                                  (eye_coords[1][2], eye_coords[1][3]),
                                  (0, 255, 0), 2)

                if 'hp' in print_output_frame:
                    cv2.putText(
                        cropped_face,
                        "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                        format(headpose_out[0], headpose_out[1],
                               headpose_out[2]), (0, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.25, (0, 0, 0), 1)

                    face = frame[face_coords[1]:face_coords[3],
                                 face_coords[0]:face_coords[2]]
                    xmin, ymin, _, _ = face_coords
                    face_center = (xmin + face.shape[1] / 2,
                                   ymin + face.shape[0] / 2, 0)
                    headpose_estimation.draw_axes(frame, face_center,
                                                  headpose_out[0],
                                                  headpose_out[1],
                                                  headpose_out[2])

                if 'ge' in print_output_frame:

                    cropped_h, cropped_w = cropped_face.shape[:2]
                    arrow_length = 0.3 * cropped_h

                    gaze_arrow_x = gaze_vector[0] * arrow_length
                    gaze_arrow_y = -gaze_vector[1] * arrow_length

                    cv2.arrowedLine(cropped_face,
                                    (eye_coords[0][0], eye_coords[0][1]),
                                    (int(eye_coords[0][2] + gaze_arrow_x),
                                     int(eye_coords[0][3] + gaze_arrow_y)),
                                    (0, 255, 0), 2)
                    cv2.arrowedLine(cropped_face,
                                    (eye_coords[1][0], eye_coords[1][1]),
                                    (int(eye_coords[1][2] + gaze_arrow_x),
                                     int(eye_coords[1][3] + gaze_arrow_y)),
                                    (0, 255, 0), 2)

                    #frame[face_coords[1]:face_coords[3], face_coords[0]:face_coords[2]] = cropped_face

                if len(preview_frame) != 0:
                    img_hor = np.hstack((cv2.resize(preview_frame, (800, 800)),
                                         cv2.resize(frame, (800, 800))))
                else:
                    img_hor = cv2.resize(frame, (800, 800))

                cv2.imshow("Monitor", img_hor)

            if frame_count % 5 == 0:
                mc.move(new_mouse_coord[0], new_mouse_coord[1])

            if key == 27:
                break

        #logging inference times
        if (frame_count > 0):
            logging.info(
                "============== Models Inference time ===============")
            logging.info("Face Detection:{:.1f}ms".format(1000 * fd_infertime /
                                                          frame_count))
            logging.info("Facial Landmarks Detection:{:.1f}ms".format(
                1000 * lm_infertime / frame_count))
            logging.info("Headpose Estimation:{:.1f}ms".format(
                1000 * hp_infertime / frame_count))
            logging.info("Gaze Estimation:{:.1f}ms".format(
                1000 * ge_infertime / frame_count))
            logging.info("============== End ===============================")

        logger.info("Video stream ended...")
        cv2.destroyAllWindows()
        feeder.close()

    except Exception as ex:
        logging.exception("Error in inference")
        logging.exception("Exception type:")
        logging.exception(type(ex))
        logging.exception("Exception args:")
        logging.exception(ex.args)
        logging.exception("Exception:")
        logging.exception(ex)
Exemplo n.º 14
0
def main(args):

    # Multiple Modes of Control
    ###########################
    ##  0 = No Control
    ##  1 = Gaze Angle Control
    ##  2 = Head Pose Control
    ##  3 = Sound Control
    ###########################
    controlMode = 0
    modes = ['No Control', 'Gaze Control', 'Head Pose', 'Sound Control']

    ####################
    # Control Commands #
    ####################
    # Left Click = Yawn
    # Right Click = Looking up
    # Increment Control Modes = Right Wink
    # Left Eye Wink and Smile are left unassigned
    # You can dictate text in Sound mode (Control mode = 3)

    #####################################################################
    # Initializing the Speech Recognition Thread
    #####################################################################

    # You can add more controls as you deem fit.
    numbers = ['zero', 'one', 'two', 'three', 'four', \
                'five', 'six', 'seven', 'eight', 'nine']

    controls = ['left', 'right', 'up', 'down']

    control_syn = {}
    for control in controls:
        control_syn.setdefault(control, [])

    # Need to account for similar sounding words as speech recog is on the edge!
    control_syn['left'].extend(['let', 'left', 'light', 'live', 'laugh'])
    control_syn['right'].extend(
        ['right', 'write', 'great', 'fight', 'might', 'ride'])
    control_syn['up'].extend(['up', 'hop', 'hope', 'out'])
    control_syn['down'].extend(['down', 'doubt', 'though'])

    device_list = load_device()

    stream_reader = audio_helper.StreamReader(device_list[1][0],
                                              received_frames)

    if not stream_reader.initialize():
        print("Failed to initialize Stream Reader")
        speech.close()
        speech = None
        return

    speech = SpeechManager()
    print('speech config = ' + str(SPEECH_CONFIG))
    if not speech.initialize(SPEECH_CONFIG, infer_device='CPU', batch_size=8):
        print("Failed to initialize ASR recognizer")
        speech.close()
        speech = None
        return

    stt = Queue()
    prevUtterance = ''

    reading_thread = Thread(target=stream_reader.read_stream, \
                        args=(speech, stt), daemon=True)
    reading_thread.start()

    #####################################################################

    # Fixing 60x60 box as yaw and pitch boundaries to
    # correspond to head turning left and right (yaw)
    # and also moving up and down (pitch)
    headYawPitchBounds = [-30, 30]

    lastGaze = [0, 0]
    lastPose = [0, 0]

    # Set the stickiness value
    stickinessHead = 5
    stickinessGaze = 10

    eventText = "No Event"

    # init the logger
    logger = logging.getLogger()

    feeder = None
    feeder = InputFeeder(args.input_type, args.input)
    feeder.load_data()

    mc = MouseController("medium", "fast")

    # Loading all the gesture control models viz. face, head and gaze
    face_model = FaceDetector(args.face, args.device, args.cpu_extension)
    # face_model.check_model()
    face_model.load_model()
    logger.info("Face Detection Model Loaded...")

    head_model = HeadPoseDetect(args.headpose, args.device, args.cpu_extension)
    # head_model.check_model()
    head_model.load_model()
    logger.info("Head Pose Detection Model Loaded...")

    landmarks_model = LandmarksDetect(args.landmarks, args.device,
                                      args.cpu_extension)
    # landmarks_model.check_model()
    landmarks_model.load_model()
    logger.info("Landmarks Detection Model Loaded...")

    gaze_model = GazeDetect(args.gazeestimation, args.device,
                            args.cpu_extension)
    # gaze_model.check_model()
    gaze_model.load_model()
    logger.info("Gaze Detection Model Loaded...")

    visualizeHeadPose = bool(distutils.util.strtobool(args.visualizeHeadPose))
    visualizeGaze = bool(distutils.util.strtobool(args.visualizeGaze))
    visualizeFace = bool(distutils.util.strtobool(args.visualizeFace))

    pixelCount_leye = []
    isEyeOpen_leye = []
    pixelCount_reye = []
    isEyeOpen_reye = []

    isCalibrated = False
    isSmiling = False
    isMouthOpen = False
    moveEnabled = False
    islookingUp = False

    lastPoses = collections.deque(maxlen=20)
    lastGazes = collections.deque(maxlen=20)

    try:
        frame_count = 0

        for ret, frame in feeder.next_batch():

            ################################################################
            # if any sound is deciphered from the spunned off thread then
            # check the last 3 words of the utterance for matching control word
            if (stt.qsize() > 0 and controlMode == 3):

                utterance = stt.get()
                print("From Parent: " + utterance)

                # need to process again only if change in utterance
                if (prevUtterance != utterance):
                    control, lastWord = detectSoundEvent(
                        utterance, controls, control_syn)

                    if control is not None:

                        direction = controls.index(control)
                        mc.moveRelative(direction)

                    else:

                        if lastWord in numbers:
                            lastWord = str(numbers.index(lastWord))

                        mc.write(lastWord)

                    prevUtterance = utterance

            ################################################################

            k = cv2.waitKey(1) & 0xFF
            # press 'q' to exit
            if k == ord('q'):
                break

            if not ret:
                break

            frame_count += 1
            crop_face = None

            # inferenceBegin = time.time()
            crop_face, box = face_model.predict(frame.copy())

            if crop_face is None:
                logger.error("Unable to detect the face.")
                continue

            # Draw the face box
            xmin, ymin, xmax, ymax = box
            if visualizeFace:
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 255),
                              3)

            orientation = head_model.predict(crop_face)

            box_left, box_right, \
            left_eye, right_eye, \
            p0, p1, p12, p13, p14, \
            p2, p3, p15, p16, p17, \
            p8, p9, p10, p11 = landmarks_model.predict(crop_face)

            # if any of the eye is not detected eye gesture and
            # gaze estimation are not executed
            if (left_eye.size * right_eye.size == 0):
                logger.error("Unable to detect eyes.")
                continue

            pad = 10
            # Compute Right Eye: Close Snap
            right_eye_ball = frame[ymin + p1[1] - pad:ymin + p0[1] + pad,
                                   xmin + p1[0] - pad:xmin + p0[0] + pad]

            # Compute Left Eye: Close Snap
            left_eye_ball = frame[ymin + p3[1] - pad:ymin + p2[1] + pad,
                                  xmin + p2[0] - pad:xmin + p3[0] + pad]

            # pixelCount_leye_bk = pixelCount_leye #can delete this line
            pixelCount_reye, Rtrigger, probR = findClosurebyStats(
                'Right', right_eye_ball, pixelCount_reye, frame_count)
            pixelCount_leye, Ltrigger, probL = findClosurebyStats(
                'Left', left_eye_ball, pixelCount_leye, frame_count)

            print("probL: " + str(probL))
            if probL < -30 and islookingUp is False:
                print('Click Right')
                controlMode = hikeControlMode(controlMode)  ## to change
                # mc.clickRight()
                islookingUp = True
                eventText = 'Increment Control Mode'
            elif probL > 0:
                islookingUp = False
                if (eventText == 'Increment Control Mode'):
                    eventText = 'No Event'

            # If both eyes are detected as pressed (as one eye
            # can shrink when the other eye is winked) then check
            # which eye has higher probability of closure.
            # Note: To close both eyes is not a gesture.
            if Ltrigger and Rtrigger:
                # print("probR = " + str(probR) + "probL = " + str(probL))
                if probR > probL:
                    Ltrigger = False
                else:
                    Rtrigger = False

            # If you want to enable left and right wink actions,
            # then call corresponding functions here.
            if Ltrigger:
                print('left eye pressed')
                # controlMode = dipControlMode(controlMode)
                # writeList(pixelCount_leye_bk) # Dumping list for debugging purpose
                # mc.scroll(20) # you can pass the head pose up/down as param
                # mc.drag()

            if Rtrigger:
                print('right eye pressed')
                # controlMode = hikeControlMode(controlMode)
                # mc.clickRight()

            gaze, (x, y) = gaze_model.predict(left_eye, right_eye, orientation)

            # inferenceEnd = time.time()
            # inferenceTime = inferenceEnd - inferenceBegin
            # print("Inference Time of 4 models = " + str(inferenceTime))

            yaw = orientation[0]
            pitch = orientation[1]
            roll = orientation[2]

            sinY = math.sin(yaw * math.pi / 180.0)
            sinP = math.sin(pitch * math.pi / 180.0)
            sinR = math.sin(roll * math.pi / 180.0)

            cosY = math.cos(yaw * math.pi / 180.0)
            cosP = math.cos(pitch * math.pi / 180.0)
            cosR = math.cos(roll * math.pi / 180.0)

            cH, cW = crop_face.shape[:2]
            arrowLength = 0.5 * max(cH, cW)

            # Drawing Eye Boxes
            (p0_x, p0_y) = box_left[:2]
            (p12_x, p12_y) = box_left[2:4]
            cv2.rectangle(frame, (p0_x + xmin, p0_y + ymin),
                          (p12_x + xmin, p12_y + ymin - 5), (255, 0, 0), 3)

            (p2_x, p2_y) = box_right[:2]
            (p17_x, p17_y) = box_right[2:4]
            cv2.rectangle(frame, (p2_x + xmin, p2_y + ymin),
                          (p17_x + xmin, p17_y + ymin - 5), (255, 0, 0), 3)

            # to draw the eye points as circles
            cv2.circle(frame, tuple(map(operator.add, p0, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p1, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p12, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p13, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p14, (xmin, ymin))), 1,
                       (255, 0, 0), 2)

            # to draw the eye points as circles
            cv2.circle(frame, tuple(map(operator.add, p2, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p3, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p15, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p16, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p17, (xmin, ymin))), 1,
                       (255, 0, 0), 2)

            # to draw mouth points
            cv2.circle(frame, tuple(map(operator.add, p8, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p9, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p10, (xmin, ymin))), 1,
                       (255, 0, 0), 2)
            cv2.circle(frame, tuple(map(operator.add, p11, (xmin, ymin))), 1,
                       (255, 0, 0), 2)

            # Finding Eye Center
            xCenter_left = int((p0_x + p12_x) / 2) + xmin
            yCenter_left = int((p0_y + p12_y) / 2) + ymin
            leftEye_Center = (xCenter_left, yCenter_left)

            # Finding Eye Center
            xCenter_right = int((p2_x + p17_x) / 2) + xmin
            yCenter_right = int((p2_y + p17_y) / 2) + ymin
            rightEye_Center = (xCenter_right, yCenter_right)

            ############# DRAWING DIRECTION ARROWS BASED ON HEAD POSITION ############
            ## Euler angles to cartesian coordinates#
            # https://stackoverflow.com/questions/1568568/how-to-convert-euler-angles-to-directional-vector

            # Total rotation matrix is: (See correct matrix in blog)

            # | cos(yaw)cos(pitch) -cos(yaw)sin(pitch)sin(roll)-sin(yaw)cos(roll) -cos(yaw)sin(pitch)cos(roll)+sin(yaw)sin(roll)|
            # | sin(yaw)cos(pitch) -sin(yaw)sin(pitch)sin(roll)+cos(yaw)cos(roll) -sin(yaw)sin(pitch)cos(roll)-cos(yaw)sin(roll)|
            # | sin(pitch)          cos(pitch)sin(roll)                            cos(pitch)sin(roll)|

            if visualizeHeadPose or controlMode == 2 or isCalibrated is False:

                # yaw and pitch are important for mouse control
                poseArrowX = orientation[0]  #* arrowLength
                poseArrowY = orientation[1]  #* arrowLength

                # Taking 2nd and 3rd row for 2D Projection
                ##############################LEFT EYE ###################################
                # cv2.arrowedLine(frame, leftEye_Center,
                #          (int((xCenter_left + arrowLength * (cosR * cosY + sinY * sinP * sinR))),
                #           int((yCenter_left + arrowLength * cosP * sinR))), (255, 0, 0), 4)

                # # center to top
                # cv2.arrowedLine(frame, leftEye_Center,
                #          (int(((xCenter_left + arrowLength * (sinY * sinP * cosR - cosY * sinR)))),
                #           int((yCenter_left + arrowLength * cosP * cosR))), (0, 0, 255), 4)

                # center to forward
                # cv2.arrowedLine(frame, leftEye_Center, \
                #          (int(((xCenter_left + arrowLength * sinY * cosP))), \
                #           int((yCenter_left - arrowLength * sinP))), (0, 255, 0), 4)

                ##############################RIGHT EYE ###################################
                # cv2.arrowedLine(frame, rightEye_Center,
                #          (int((xCenter_right + arrowLength * (cosR * cosY + sinY * sinP * sinR))),
                #           int((yCenter_right + arrowLength * cosP * sinR))), (255, 0, 0), 4)

                # # center to top
                # cv2.arrowedLine(frame, rightEye_Center,
                #          (int(((xCenter_right + arrowLength * (sinY * sinP * cosR - cosY * sinR)))),
                #           int((yCenter_right + arrowLength * cosP * cosR))), (0, 0, 255), 4)

                # center to forward
                # cv2.arrowedLine(frame, rightEye_Center,
                #          (int(((xCenter_right + arrowLength * sinY * cosP))),
                #           int((yCenter_right - arrowLength * sinP))), (0, 255, 0), 4)

            # gaze is required for calibration
            if visualizeGaze or controlMode == 1 or isCalibrated is False:

                gazeArrowX = gaze[0] * arrowLength
                gazeArrowY = -gaze[1] * arrowLength

                cv2.arrowedLine(frame, leftEye_Center,
                                (int(leftEye_Center[0] + gazeArrowX),
                                 int(leftEye_Center[1] + gazeArrowY)),
                                (0, 255, 0), 4)
                cv2.arrowedLine(frame, rightEye_Center,
                                (int(rightEye_Center[0] + gazeArrowX),
                                 int(rightEye_Center[1] + gazeArrowY)),
                                (0, 255, 0), 4)

            ###############################
            # Compute Mouth Aspect Ratio  #
            ###############################
            mouthWidth = p9[0] - p8[0]
            mouthHeight = p11[1] - p10[1]

            if (mouthWidth != 0):
                mAspRatio = mouthHeight / mouthWidth
            else:
                mAspRatio = 0
            # print('MAR RATIO = ' + str(mAspRatio))

            # To validate face is properly facing the camera.
            # To avoid erroneous control mode switches coz of face turns.
            if (isFaceInBounds(yaw, pitch) and mAspRatio > 0):

                # These threshold constants need to either modified or made dynamic.
                #
                # when mouth is open
                if mAspRatio > 0.4 and isMouthOpen is False:

                    # mouthHeights.clear()
                    # isSoundControl = False
                    print('clicking left')
                    mc.clickLeft()
                    isMouthOpen = True
                    eventText = 'Click Left'

                elif mAspRatio < 0.35:
                    isMouthOpen = False
                    if (eventText == 'Click Left'):
                        eventText = 'No Event'

                # when mouth is wide, i.e. smiling
                if mAspRatio < 0.26 and isSmiling == False:

                    print('You are smiling...')
                    eventText = 'Smiling'
                    isSmiling = True

                elif mAspRatio > 0.3:
                    # Reset the click flag once smile is over.
                    isSmiling = False
                    if (eventText == 'Smiling'):
                        eventText = 'No Event'

            # controlMode = 3 # To debug a specific control mode.

            try:
                if frame_count % 5 == 0:

                    if (mc.calibrated is False):

                        isCalibrated = mc.captureCorners(
                            gazeArrowX, gazeArrowY)

                    else:
                        # Face should be forward facing inorder to take comamnds.
                        # if (isFaceInBounds(headYawPitchBounds, yaw, pitch)):

                        if controlMode == 1:

                            moveEnabled, lastGazes =  \
                                isMoveEnabled(lastGaze, stickinessGaze, gazeArrowX, gazeArrowY, lastGazes)

                            if moveEnabled:
                                print('moving mouse with gaze')
                                mc.moveWithGaze(gazeArrowX, gazeArrowY)
                                lastGaze = [gazeArrowX, gazeArrowY
                                            ]  #saving pos for stickiness
                        elif controlMode == 2:

                            moveEnabled, lastPoses =  \
                                isMoveEnabled(lastPose, stickinessHead, poseArrowX, poseArrowY, lastPoses)

                            if moveEnabled:
                                print('moving mouse with head. Yaw: ' +
                                      str(poseArrowX) + " Pitch: " +
                                      str(poseArrowY) + " Roll: " +
                                      str(orientation[2]))
                                mc.moveWithHead(poseArrowX, poseArrowY,
                                                headYawPitchBounds)
                                lastPose = [poseArrowX, poseArrowY
                                            ]  #saving pos for stickiness

            except Exception as err:
                print(traceback.format_exc())
                PrintException()
                logger.error("Exception occurred while moving cursor!")

            # Display calibration status on video
            if isCalibrated:
                frame = cv2.putText(frame, 'Calibration is done.', (20, 30),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255),
                                    1, cv2.LINE_AA)

            frame = cv2.putText(frame, 'Control Mode: ' + modes[controlMode],
                                (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (0, 0, 255), 1, cv2.LINE_AA)

            frame = cv2.putText(frame, 'Event: ' + eventText, (20, 70),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1,
                                cv2.LINE_AA)

            frame = cv2.putText(frame, 'MAR: ' + str(round(mAspRatio, 2)),
                                (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (0, 255, 0), 1, cv2.LINE_AA)

            frame = cv2.putText(frame, 'Mouse Loc: ' + str(mc.getLocation()),
                                (20, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (0, 255, 0), 1, cv2.LINE_AA)

            imshow('frame', frame, width=800)
            # frameEnd = time.time()
            # frameTime = frameEnd - frameBegin
            # print("FPS = " + str(1/frameTime))

    except Exception as err:
        print(traceback.format_exc())
        PrintException()
        logger.error(err)

    cv2.destroyAllWindows()
    feeder.close()
def main():
    args = build_argparser().parse_args()
    single_image_mode = (args.input_type == 'image') 

    #Create and validate input feed
    input_feed = InputFeeder(input_type=args.input_type,input_file=args.input_path)
    input_feed.load_data()

    if not input_feed.is_open():
        log.critical('Error opening input, check --input_path parameter (use --help for more info)')
        sys.exit(1)

    #Load models
    face_model = FaceDetection(args.face_detection_model)
    face_model.load_model(args.device)
    head_pose_model = HeadPoseEstimation(args.head_pose_model)
    head_pose_model.load_model(args.device)
    facial_landmarks_model = FacialLandmarksDetection(args.facial_landmarks_model)
    facial_landmarks_model.load_model(args.device)
    gaze_estimation_model = GazeEstimation(args.gaze_estimation_model)
    gaze_estimation_model.load_model(args.device)

    #initialize frame count for filtering
    count = 0
    gaze_vector_accum = np.array([0,0,0],dtype='float64')
    gaze_vector_filtered = np.array([0,0,0],dtype='float64')
    
    #get screen calibration
    if not args.calibrate:
        run_calibration = False
        cal_x_limits, cal_y_limits = utils.get_calibration()
    else:
        run_calibration = True
        update_display = True
        
        #squares to draw on screen for calibration
        top_left_square = {'pt1':(0,0), 'pt2':(BOX_SIDE_LENGTH,BOX_SIDE_LENGTH)}
        top_right_square = {'pt1':(SCREEN_WIDTH - BOX_SIDE_LENGTH,0), 'pt2':(SCREEN_WIDTH, BOX_SIDE_LENGTH)}
        bottom_left_square = {'pt1':(0,SCREEN_HEIGHT - BOX_SIDE_LENGTH), 'pt2':(BOX_SIDE_LENGTH,SCREEN_HEIGHT)}
        bottom_right_square =   {'pt1':(SCREEN_WIDTH - BOX_SIDE_LENGTH,SCREEN_HEIGHT - BOX_SIDE_LENGTH), 
                                'pt2':(SCREEN_WIDTH,SCREEN_HEIGHT)}
        cal_squares = [top_left_square,top_right_square,bottom_left_square, bottom_right_square]
        
        #names of the calibration points for storing on calibration file
        cal_names = ['top_left', 'top_right', 'bottom_left', 'bottom_right']
        
        #model output values for each calibration point will be stored here
        cal_points = {}

        square_iter = iter(cal_squares)
        name_iter = iter(cal_names)
        
        #image to display on screen for calibration
        base_img = get_base_img("LOOK AT THE SQUARES FOR 2 SECONDS","AND THEN PRESS n", COLORS[0])
        

    if not single_image_mode:
        while True:

            #filter results
            count += 1
            if(count>FILTER_QUANTITY):
                gaze_vector_filtered=gaze_vector_accum/FILTER_QUANTITY
                gaze_vector_accum=np.array([0,0,0],dtype='float64')
                count=0

            #process frames
            frame = next(input_feed.next_batch())
            
            start_time=time.time()
            face_boxes = run_inference(frame, face_model)
            cropped_faces = utils.crop_image(frame,face_boxes)
            
            if cropped_faces==0: #no face detected, nothing to process
                continue

            elif cropped_faces is None: #finished reading input feed
                break

            elif len(cropped_faces)==1: #found a single face in the frame, proceed
                
                head_pose = run_inference(cropped_faces[0], head_pose_model)
                eye_boxes = run_inference(cropped_faces[0], facial_landmarks_model)
                cropped_eyes = utils.crop_image(cropped_faces[0], eye_boxes)
                gaze_vector = run_inference_gaze(cropped_eyes[0], cropped_eyes[1], head_pose, gaze_estimation_model)
                
                inference_time=time.time()-start_time
                
                gaze_vector_accum += gaze_vector
                
                if run_calibration:
                    
                    if update_display:
                        img = np.copy(base_img)
                        square = next(square_iter, None)
                        if not square is None: 
                            cv2.rectangle(img,square['pt1'], square['pt2'],COLORS[0],-1)
                            update_display=False
                        else: #Done with calibration
                            cal_x_limits, cal_y_limits = utils.get_calibration(cal_points)
                            utils.save_calibration(cal_points)
                            run_calibration=False

                    utils.imshow_fullscreen('window',img)

                    if cv2.waitKey(1) & 0xFF == ord('n'):
                        update_display = True
                        point = np.array([ gaze_vector_filtered[0], gaze_vector_filtered[1] ])
                        point_name = next(name_iter)
                        cal_points[point_name] = point
                    
                else:
                    
                    if not args.display_all:
                        img = get_base_img("GAZE CONTROL ENABLED", "MOVE MOUSE TO ANY CORNER OR PRESS q TO EXIT", COLORS[1])
                        utils.imshow_fullscreen('window',img)
                    else:
                        utils.display_inference_results(frame, face_boxes, head_pose, gaze_vector, inference_time)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        print("User terminated program, goodbye")
                        break
                    

                    screen_x, screen_y = get_screen_position(gaze_vector_filtered[0], gaze_vector_filtered[1], cal_x_limits, cal_y_limits)
                
                    try:
                        pyautogui.moveTo(screen_x,screen_y,MOUSE_MOVE_TIME)
                    except pyautogui.FailSafeException:
                        print("User terminated program, goodbye")
                        break

            else:
                #Handle multiple people here if needed
                log.critical("ERROR: Multiple people detected, only single person supported")
                sys.exit(1)
            
    else:
        #Implement single image mode here if needed
        log.critical("ERROR: Single image mode not implemented")
        sys.exit(1)

    input_feed.close()
    cv2.destroyAllWindows()
Exemplo n.º 16
0
class Inferencer:
    def __init__(self,
                 device='CPU',
                 mouse_con=False,
                 face_dec=None,
                 fac_land=None,
                 head_pose=None,
                 gaze=None,
                 show_video=False,
                 save_video=False):
        '''
        all models should be put in here 
        '''
        if face_dec and fac_land and head_pose and gaze:
            self.face_dec, self.fac_land, self.head_pose, self.gaze = FaceDetectionModel(
                face_dec, device=device), FacialLandmarksDetection(
                    fac_land, device=device), Head_Pose_Estimation(
                        head_pose,
                        device=device), Gaze_Estimation(gaze, device=device)
            self.face_dec.load_model()
            self.fac_land.load_model()
            self.head_pose.load_model()
            self.gaze.load_model()
        else:
            raise ValueError('Missing Arguments')

        if mouse_con:
            self.mouse_con = MouseController("low", "fast")

        self.show_video, self.save_video = show_video, save_video

    def __call__(
        self,
        input_type=None,
        input_file=None,
    ):
        self.run(input_type=input_type, input_file=input_file)

    def run(
        self,
        input_type=None,
        input_file=None,
    ):
        if input_type and input_file:
            self.input_ = InputFeeder(input_type, input_file)
            self.input_.load_data()
            if self.save_video:
                out = cv2.VideoWriter(
                    'output.mp4', 0x00000021, 30,
                    (int(self.input_.cap.get(3)), int(self.input_.cap.get(4))))
        try:
            fc_dec_inf_time = 0
            landmark_inf_time = 0
            pose_inf_time = 0
            gaze_inf_time = 0
            frame_counter = 0
            while True:
                # Read the next frame
                try:
                    frame = next(self.input_.next_batch())
                    frame_counter += 1
                except StopIteration:
                    break

                key_pressed = cv2.waitKey(60)

                # face detection
                start = time.time()
                out_frame, boxes = self.face_dec.predict(frame,
                                                         display_output=True)
                fc_dec_inf_time += (time.time() - start)

                #for each box
                for box in boxes:
                    face = out_frame[box[1]:box[3], box[0]:box[2]]

                    start = time.time()
                    out_frame, left_eye_point, right_eye_point = self.fac_land.predict(
                        out_frame, face, box, display_output=True)
                    landmark_inf_time += (time.time() - start)

                    start = time.time()
                    out_frame, headpose_angels = self.head_pose.predict(
                        out_frame, face, box, display_output=True)
                    pose_inf_time += (time.time() - start)

                    start = time.time()
                    out_frame, gazevector = self.gaze.predict(
                        out_frame,
                        face,
                        box,
                        left_eye_point,
                        right_eye_point,
                        headpose_angels,
                        display_output=True)
                    gaze_inf_time += (time.time() - start)

                    if self.show_video:
                        cv2.imshow('im', out_frame)

                    if self.save_video:
                        out.write(out_frame)

                    if self.mouse_con:
                        self.mouse_con.move(gazevector[0], gazevector[1])

                    time.sleep(1)

                    #consider only first detected face in the frame
                    break

                # Break if escape key pressed
                if key_pressed == 27:
                    break

            if self.save_video:
                out.release()
            self.input_.close()
            cv2.destroyAllWindows()
            print(
                'average inference time for face detection model is :- {:2f}ms'
                .format((fc_dec_inf_time / frame_counter) * 1000))
            print(
                'average inference time for facial landmark model is :- {:2f}ms'
                .format((landmark_inf_time / frame_counter) * 1000))
            print(
                'average inference time for head pose estimation model is :- {:2f}ms'
                .format((pose_inf_time / frame_counter) * 1000))
            print(
                'average inference time for gaze estimation model is :- {:2f}ms'
                .format((gaze_inf_time / frame_counter) * 1000))
        except Exception as ex:
            logging.exception("Error in inference: " + str(ex))
Exemplo n.º 17
0
def main():

    # Grab command line args
    args = read_argument().parse_args()

    logger_obj = log.getLogger()

    if args.input == 'CAM':
        input_feeder = InputFeeder('cam')
    elif args.input.endswith('jpg') or args.input.endswith('bmp'):
        input_feeder = InputFeeder('image', args.input)
    elif args.input.endswith('mp4'):
        input_feeder = InputFeeder('video', args.input)
    else:
        logger_obj.error(
            "Unsupported input, valid inputs are image(jpg and bmp), video file(mp4) or webcam/video stream."
        )

    # Initialize inference models
    face_detection_model = face_detection(args.face_detection_model,
                                          args.device, args.prob_threshold,
                                          args.cpu_extension)
    facial_landmarks_detection_model = facial_landmarks_detection(
        args.facial_landmarks_detection, args.device, args.cpu_extension)
    head_pose_estimation_model = head_pose_estimation(
        args.head_pose_estimation, args.device, args.cpu_extension)
    gaze_estimation_model = gaze_estimation(args.gaze_estimation, args.device,
                                            args.cpu_extension)

    mouse_controller = MouseController('medium', 'fast')

    # Load inference models

    start_time = time.time()
    face_detection_model.load_model()
    face_detection_model_load_time = time.time()
    logger_obj.error("Face detection load time in seconds: {:.2f} ms".format(
        (time.time() - start_time) * 1000))

    facial_landmarks_detection_model.load_model()
    facial_landmarks_detection_load_time = time.time()
    logger_obj.error(
        "Facial Landmark detection load time in seconds: {:.2f} ms".format(
            (time.time() - start_time) * 1000))

    head_pose_estimation_model.load_model()
    head_pose_estimation_load_time = time.time()
    logger_obj.error(
        "Head pose detection load time in seconds: {:.2f} ms".format(
            (time.time() - start_time) * 1000))

    gaze_estimation_model.load_model()
    gaze_estimation_load_time = time.time()
    logger_obj.error("Gaze estimation load time in seconds: {:.2f} ms".format(
        (time.time() - start_time) * 1000))

    # Load input feeder.
    input_feeder.load_data()

    total_model_load_time = time.time() - start_time

    counter = 0
    inference_start_time = time.time()

    # run inference
    for flag, frame in input_feeder.next_batch():

        if not flag:
            break

        pressed_key = cv2.waitKey(60)
        counter = counter + 1

        face_detection_output, coords = face_detection_model.predict(frame)

        head_pose_estimation_output = head_pose_estimation_model.predict(
            face_detection_output)

        left_eye_image, right_eye_image, eye_coord = facial_landmarks_detection_model.predict(
            face_detection_output)

        mouse_controller_coordinate, gaze_estimation_vector = gaze_estimation_model.predict(
            left_eye_image, right_eye_image, head_pose_estimation_output)

        preview_flag = args.previewFlags

        if len(preview_flag) != 0:

            preview_window = frame.copy()

            if 'face_detect' in preview_flag:

                cv2.rectangle(preview_window, (coords[0], coords[1]),
                              (coords[2], coords[3]), (0, 0, 255), 3)

                #logger_obj.error('inside face_detect')

            if 'face_landmark_detect' in preview_flag:

                if 'face_detect' in preview_flag:
                    preview_window = face_detection_output

                cv2.rectangle(preview_window,
                              (eye_coord[0][0], eye_coord[0][1]),
                              (eye_coord[0][2], eye_coord[0][3]),
                              (255, 0, 255))
                cv2.rectangle(preview_window,
                              (eye_coord[1][0], eye_coord[1][1]),
                              (eye_coord[1][2], eye_coord[1][3]),
                              (255, 0, 255))

                #logger_obj.error('inside facial landmark')

            if 'head_pose' in preview_flag:

                cv2.putText(
                    preview_window,
                    "yaw:{:.1f} | pitch:{:.1f} | roll:{:.1f}".format(
                        head_pose_estimation_output[0],
                        head_pose_estimation_output[1],
                        head_pose_estimation_output[2]), (20, 20),
                    cv2.FONT_HERSHEY_COMPLEX, 0.35, (0, 0, 0), 1)

                #logger_obj.error('inside head pose')

            if 'gaze_est' in preview_flag:

                yaw = head_pose_estimation_output[0]
                pitch = head_pose_estimation_output[1]
                roll = head_pose_estimation_output[2]

                focal_length = 950
                scale = 50

                center_of_face = (face_detection_output.shape[1] / 2,
                                  face_detection_output.shape[0] / 2, 0)

                if 'face_detect' in preview_flag or 'face_landmark_detect' in preview_flag:
                    draw_axes(preview_window, center_of_face, yaw, pitch, roll,
                              scale, focal_length)
                    #logger_obj.error('inside gaze 1')
                else:
                    draw_axes(frame, center_of_face, yaw, pitch, roll, scale,
                              focal_length)
                    #logger_obj.error('inside gaze 2')

        if len(preview_flag) != 0:
            #image = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_window, (500, 500))))
            image = cv2.resize(preview_window, (500, 500))
            #logger_obj.error('hstack images side by side')
        else:
            image = cv2.resize(frame, (500, 500))

        cv2.imshow('Visualization', image)

        mouse_controller.move(mouse_controller_coordinate[0],
                              mouse_controller_coordinate[1])

        if pressed_key == 27:
            logger_obj.error("exit key is pressed..")
            break

    inference_time = round(time.time() - inference_start_time, 2)
    fps = int(counter) / inference_time

    logger_obj.error("counter {} seconds".format(counter))
    logger_obj.error("Total model load time in seconds: {:.2f} s".format(
        total_model_load_time))
    logger_obj.error(
        "Total inference time in seconds: {:.2f} s".format(inference_time))
    logger_obj.error("fps {}".format(fps))

    input_feeder.close()
    cv2.destroyAllWindows()
Exemplo n.º 18
0
def run_controller(args):
    #     print(args.save)
    feeder = None

    if args.input == "cam":
        feeder = InputFeeder("cam")

    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        if not os.path.isfile(args.input):
            log.error("Unable to find specified video file")
            exit(1)
        feeder = InputFeeder("image", args.input, args.save)

    else:
        if not os.path.isfile(args.input):
            log.error("Unable to find specified video file")
            exit(1)
        feeder = InputFeeder("video", args.input, args.save)

    feeder.load_data()

    mc = MouseController('medium', 'fast')

    model_face = Face_Detector()
    model_face.load_model(args.model_fd, args.device, args.extension)

    model_pose = Pose_Estimator()
    model_pose.load_model(args.model_pe, args.device, args.extension)

    model_landmark = Facial_Landmarks()
    model_landmark.load_model(args.model_fl, args.device, args.extension)

    model_gaze = Gaze_Estimator()
    model_gaze.load_model(args.model_ge, args.device, args.extension)

    frame_count = 0
    for b, frame in feeder.next_batch():
        frame_count += 1
        preview = np.copy(frame)
        crop_face, face_count, points = model_face.predict(
            preview, args.thres_fd)

        key_pressed = cv2.waitKey(30)
        if (face_count == 0):
            if (b or key_pressed == 27):
                break

            log.error('no face is detected')
            feeder.save_file(preview)
            continue

        angles = model_pose.predict(preview, crop_face)
        left_eye, right_eye, eye_points = model_landmark.predict(
            preview, crop_face, points)

        mx, my = model_gaze.predict(preview, left_eye, right_eye, angles,
                                    eye_points)
        feeder.save_file(preview)

        if key_pressed == 27:
            break

        if frame_count % 5 == 0:
            if args.draw_lines:
                cv2.imshow('video', cv2.resize(preview, (500, 500)))
            else:
                cv2.imshow('video', cv2.resize(frame, (500, 500)))
            mc.move(mx, my)

    feeder.close()
    cv2.destroyAllWindows()
Exemplo n.º 19
0
def main():

    args = build_argparser().parse_args()
    previewFlags = args.previewFlags
    
    logger = logging.getLogger()
    inputFile = args.input
    inputFeeder = None

    if inputFile.lower()=="cam":
        inputFeeder=InputFeeder("cam")
    if not os.path.isfile(inputFile):
        logger.error("Unable to find input file")
        exit(1)
    inputFeeder=InputFeeder("video",inputFile)


    mfd=Model_Face_Detection(args.facedetectionmodel,args.device,args.cpu_extension)
    mfld=Model_Facial_Landmarks_Detection(args.faciallandmarkmodel,args.device,args.cpu_extension)
    mge=Model_Gaze_Estimation(args.gazeestimationmodel,args.device,args.cpu_extension)
    mhpe=Model_Head_Pose_Estimation(args.headposemodel,args.device,args.cpu_extension)

    mc = MouseController('medium','fast')
    #inputFeeder=InputFeeder("cam")
    inputFeeder.load_data()

    mfd.load_model()
    mfld.load_model()
    mge.load_model()
    mhpe.load_model()
    frame_count = 0
    for ret, frame in inputFeeder.next_batch():


        if frame is not None:

            frame_count+=1
            if frame_count%5==0:
                cv2.imshow('video',cv2.resize(frame,(500,500)))
        
            key = cv2.waitKey(60)
            croppedFace, face_coords = mfd.predict(frame.copy(), args.prob_threshold)
            if type(croppedFace)==int:
                logger.error("No face detected.")
                if key==27:
                    break
                continue
            
            hp_out = mhpe.predict(croppedFace.copy())
            
            left_eye, right_eye, eye_coords = mfld.predict(croppedFace.copy())
            #print(left_eye
            
            new_mouse_coord, gaze_vector = mge.predict(left_eye, right_eye, hp_out)
            
            if (not len(previewFlags)==0):
                preview_frame = frame.copy()
                if 'fd' in previewFlags:
                
                    preview_frame = croppedFace
                if 'fld' in previewFlags:
                    cv2.rectangle(croppedFace, (eye_coords[0][0]-10, eye_coords[0][1]-10), (eye_coords[0][2]+10, eye_coords[0][3]+10), (0,255,0), 3)
                    cv2.rectangle(croppedFace, (eye_coords[1][0]-10, eye_coords[1][1]-10), (eye_coords[1][2]+10, eye_coords[1][3]+10), (0,255,0), 3)
                    
                    
                if 'hp' in previewFlags:
                    cv2.putText(preview_frame, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".format(hp_out[0],hp_out[1],hp_out[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1)
                if 'ge' in previewFlags:
                    x, y, w = int(gaze_vector[0]*12), int(gaze_vector[1]*12), 160
                    le =cv2.line(left_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2)
                    cv2.line(le, (x-w, y+w), (x+w, y-w), (255,0,255), 2)
                    re = cv2.line(right_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2)
                    cv2.line(re, (x-w, y+w), (x+w, y-w), (255,0,255), 2)
                    croppedFace[eye_coords[0][1]:eye_coords[0][3],eye_coords[0][0]:eye_coords[0][2]] = le
                    croppedFace[eye_coords[1][1]:eye_coords[1][3],eye_coords[1][0]:eye_coords[1][2]] = re
                    
                    
                cv2.imshow("visualization",cv2.resize(preview_frame,(500,500)))
            
            if frame_count%5==0:
                mc.move(new_mouse_coord[0],new_mouse_coord[1])    
            if key==27:
                    break
    logger.error("video ended...")
    cv2.destroyAllWindows()
    inputFeeder.close()
Exemplo n.º 20
0
def main():
    args = get_args()

    log.basicConfig(filename='example.log', level=log.DEBUG)

    inputFile = args.input
    #inputFile = "./bin/demo.mp4"

    mouse = MouseController("high", "fast")

    frame_count = 0
    focal_length = 950.0
    scale = 50

    #print(f"Visual flag: {args.visual_flag}")

    if inputFile.lower() == "cam":
        feed = InputFeeder('cam')
        log.info("Video source: " + str(inputFile))

    else:
        if not os.path.isfile(inputFile):
            log.error("Unable to find file: " + inputFile)
            exit(1)
        feed = InputFeeder("video", inputFile)
        log.info("Video source: " + str(inputFile))
        log.info("InputFeeder initialized")

    log.info("Device: " + str(args.device))
    log.info("Face detection model: " + str(args.facedetectionmodel))
    log.info("Facial landmarks model: " + str(args.faciallandmarksmodel))
    log.info("Head pose estimation model: " + str(args.headposemodel))
    log.info("Gaze estimation model: " + str(args.gazeestimationmodel))

    if args.stats == 1:
        print("Running statistics...")
        inference_times = []
        fdm_inference_times = []
        hpm_inference_times = []
        flm_inference_times = []
        gem_inference_times = []
        start_time = time.time()

    # Create instances of the different models
    fdm = FaceDetector(args.facedetectionmodel, args.device,
                       args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        fdm.load_model()
        fdm_load_time = time.time() - start_time
    else:
        fdm.load_model()
    fdm.check_model()

    hpm = HeadPoseEstimator(args.headposemodel, args.device,
                            args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        hpm.load_model()
        hpm_load_time = time.time() - start_time
    else:
        hpm.load_model()
    hpm.check_model()

    flm = FacialLandmarksDetector(args.faciallandmarksmodel, args.device,
                                  args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        flm.load_model()
        flm_load_time = time.time() - start_time
    else:
        flm.load_model()
    flm.check_model()

    gem = GazeEstimator(args.gazeestimationmodel, args.device,
                        args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        gem.load_model()
        gem_load_time = time.time() - start_time
    else:
        gem.load_model()
    gem.check_model()

    if args.stats == 1:
        duration_loading = time.time() - start_time
        print(
            f"Duration for loading and checking the models: {duration_loading}"
        )
        log.info(
            f"Duration for loading and checking the models: {duration_loading}"
        )

    cv2.namedWindow('preview', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('preview', 600, 600)

    feed.load_data()
    for ret, frame in feed.next_batch():
        if not ret:
            break

        if frame is not None:
            frame_count += 1
            key = cv2.waitKey(60)

            if args.stats == 1:
                start_time = time.time()

            # Run face detection
            face_crop, face_coords = fdm.predict(frame.copy())
            print("Face crop shape: " + str(face_crop.shape))
            frame_h, frame_w = frame.shape[:2]
            (xmin, ymin, xmax, ymax) = face_coords
            face_frame = frame[ymin:ymax, xmin:xmax]
            #center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0) # 0 for colour channel
            #print("Center of face " + str(center_of_face))

            try:
                # Check if face was detected
                if type(face_coords) == int:
                    print("Unable to detect face")
                    if key == 27:
                        break
                    continue

                # Facial landmark detection
                left_eye_crop, right_eye_crop, landmarks, crop_coords = flm.predict(
                    face_crop.copy())
                #print("Landmarks" +str(landmarks))
                left_eye = (landmarks[0], landmarks[1])
                right_eye = (landmarks[2], landmarks[3])

                # Landmark position based on complete frame
                landmarks_viz = landmarks
                landmarks_viz[0] = landmarks_viz[0] + xmin
                landmarks_viz[1] = landmarks_viz[1] + ymin
                landmarks_viz[2] = landmarks_viz[2] + xmin
                landmarks_viz[3] = landmarks_viz[3] + ymin

                crop_coords_viz = (crop_coords[0] + xmin, crop_coords[1] +
                                   ymin, crop_coords[2] + xmin,
                                   crop_coords[3] + ymin, crop_coords[4] +
                                   xmin, crop_coords[5] + ymin,
                                   crop_coords[6] + xmin,
                                   crop_coords[7] + ymin)

                left_eye_viz = (landmarks_viz[0], landmarks_viz[1])
                right_eye_viz = (landmarks_viz[2], landmarks_viz[3])

                third_eye_viz_x = (landmarks_viz[2] -
                                   landmarks_viz[0]) / 2 + landmarks_viz[0]
                third_eye_viz_y = (landmarks_viz[3] -
                                   landmarks_viz[1]) / 2 + landmarks_viz[1]
                third_eye_viz = (third_eye_viz_x, third_eye_viz_y)
                #print(landmarks_viz[0], landmarks_viz[2], third_eye_viz_x)

                # Head pose estimation
                head_pose = hpm.predict(face_crop.copy())
                print("Head pose: " + str(head_pose))
                (yaw, pitch, roll) = head_pose
                frame = display_head_pose(frame, pitch, roll, yaw)

                # Send inputs to GazeEstimator
                gaze_vector = gem.predict(head_pose, left_eye_crop,
                                          right_eye_crop)

                if args.stats == 1:
                    inference_time = time.time() - start_time
                    inference_times.append(inference_time)

                print(gaze_vector)
                frame = display_gaze(frame, gaze_vector)

                # Control the mouse
                if frame_count % 5 == 0:
                    mouse_x, mouse_y = get_mouse_vector(gaze_vector, roll)
                    print("Mouse vector:" + str(mouse_x) + " - " +
                          str(mouse_y))
                    mouse.move(mouse_x, mouse_y)
                    currentMouseX, currentMouseY = pyautogui.position()
                    print("Mouse coordinates: " + str(currentMouseX) + ", " +
                          str(currentMouseY))

                if args.visual_flag == 1:

                    frame = draw_bounding_box(frame, face_coords)

                    left_eye_frame = crop_coords_viz[0:4]
                    right_eye_frame = crop_coords_viz[4:]
                    frame = draw_bounding_box(frame, left_eye_frame)
                    frame = draw_bounding_box(frame, right_eye_frame)

                    frame = visualize_landmark(frame, left_eye_viz)
                    frame = visualize_landmark(frame,
                                               right_eye_viz,
                                               color=(0, 0, 255))

                    frame = visualize_gaze(frame, gaze_vector, landmarks_viz)

                    # visualize the axes of the HeadPoseEstimator results
                    #frame = hpm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)
                    frame = hpm.draw_axes(frame.copy(), third_eye_viz, yaw,
                                          pitch, roll, scale, focal_length)
                    #hdm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)

                cv2.imshow('preview', frame)
                cv2.imshow('left eye', left_eye_crop)
                cv2.imshow('right eye', right_eye_crop)

            except Exception as e:
                print("Unable to predict using model" + str(e) +
                      " for frame " + str(frame_count))
                log.error("Unable to predict using model" + str(e) +
                          " for frame " + str(frame_count))
            continue

    if args.stats == 1:
        avg_inference_time = sum(inference_times) / len(inference_times)
        print("Average inference time: " + str(avg_inference_time))
        log.info("Average inference time: " + str(avg_inference_time))
        log.info("Load time for face detection model: " + str(fdm_load_time))
        log.info("Load time for facial landmarks model: " + str(flm_load_time))
        log.info("Load time for head pose detection model: " +
                 str(hpm_load_time))
        log.info("Load time for gaze estimation model: " + str(gem_load_time))
    cv2.destroyAllWindows()
    feed.close()
Exemplo n.º 21
0
def infer_on_video(args):
    args.ct = float(args.ct)
    input_file = args.i

    # Check if 'cam' or video file was chosen?
    if input_file.lower() == 'cam':
        i_feeder = InputFeeder(input_type='cam')
    else:
        if not os.path.isfile(input_file):
            log.error(
                "Wasn't able to find video file, please correct directory!")
            exit(1)
        i_feeder = InputFeeder(input_type='video', input_file=input_file)

    # Load image/frame of chosen medium
    i_feeder.load_data()

    # Initialize the Inference Engine for each model
    fd_plugin = Fd_Network()
    lr_plugin = Lr_Network()
    hp_plugin = Hp_Network()
    ge_plugin = Ge_Network()

    # Load the network models into the IE and get the net input shape
    start_load_time = time.time()
    fd_plugin.load_model(args.fdm, args.d)
    lr_plugin.load_model(args.lrm, args.d)
    hp_plugin.load_model(args.hpm, args.d)
    ge_plugin.load_model(args.gem, args.d)
    total_load_time = time.time() - start_load_time
    log.info("Time it took to load all models: " + str(total_load_time))

    mouse_controller = MouseController('medium', 'fast')

    # Get net input shape of models
    fd_net_input_shape = fd_plugin.get_input_shape()
    lr_net_input_shape = lr_plugin.get_input_shape()
    hp_net_input_shape = hp_plugin.get_input_shape()
    # Currently not used as it didn't return the needed shape correctly for gaze estimation
    # ge_net_input_shape = ge_plugin.get_input_shape()

    # frame_count for FPS calc and start_inf_time, to calc total inference time
    frame_count = 0
    start_inf_time = time.time()
    # Process frames until the video ends, or process is exited
    for ret, frame in i_feeder.next_batch():
        if not ret:
            break

        frame_count += 1
        key_pressed = cv2.waitKey(60)

        height, width = frame.shape[:2]

        ##### FACE-DETECTION #START#
        # Pre-process the frame
        fd_frame = preprocess_image(frame, fd_net_input_shape[3],
                                    fd_net_input_shape[2], "face-detection")

        # Perform inference on the frame
        fd_plugin.async_inference(fd_frame)

        # Get the output of inference
        if fd_plugin.wait() == 0:
            result = fd_plugin.extract_output()
            # Get frame with bounding box for face, a cropped version and it's coords
            cropped_face, coords_face = detect_face(frame, result, args, width,
                                                    height)
            ##### FACE-DETECTION #END#

            ##### LANDMARK REGRESSION MODEL #START#
            lr_frame = preprocess_image(cropped_face, lr_net_input_shape[3],
                                        lr_net_input_shape[2],
                                        "landmark-regression")
            lr_plugin.async_inference(lr_frame)

            if lr_plugin.wait() == 0:
                lr_result = lr_plugin.extract_output()
                l_eye_img, r_eye_img, eye_coords = preprocess_lr_output(
                    lr_result, cropped_face)
            ###### LANDMARK REGRESSION MODEL #END#

            ##### HEAD POSE MODEL #START#
            hp_frame = preprocess_image(cropped_face, hp_net_input_shape[3],
                                        hp_net_input_shape[2], "head-pose")
            hp_plugin.async_inference(hp_frame)

            if hp_plugin.wait() == 0:
                hp_result = hp_plugin.extract_output()
                hp_output = preprocess_hp_output(hp_result)
            ##### HEAD POSE MODEL #END#

            ##### GAZE AND MOUSE #START#
            # Hard-coded value because net-input-shape didn't return correctly for the gaze-estimation model
            p_l_eye_img = preprocess_image(l_eye_img, 60, 60,
                                           "gaze-estimation")
            p_r_eye_img = preprocess_image(r_eye_img, 60, 60,
                                           "gaze-estimation")
            # Perform inference on eye images and head pose output
            ge_plugin.async_inference(p_l_eye_img, p_r_eye_img, hp_output)

            if ge_plugin.wait() == 0:
                ge_result = ge_plugin.extract_output()
                mouse_coord, gaze_vector = preprocess_ge_output(
                    ge_result, hp_output)
            ##### GAZE AND MOUSE #END#

            # Draw on frame if at least one flag was entered via command line
            if len(args.flags) != 0:
                draw_results(frame, cropped_face, coords_face, l_eye_img,
                             r_eye_img, eye_coords, hp_output, gaze_vector,
                             args.flags, height)

            cv2.imshow("cropped", cropped_face)
            # cv2.imshow("Left Eye", l_eye_img)
            # cv2.imshow("Right Eye", r_eye_img)
            cv2.imshow("frame", frame)

            if frame_count % 5 == 0:
                mouse_controller.move(mouse_coord[0], mouse_coord[1])

        # Break if escape key pressed
        if key_pressed == 27:
            break

    total_inf_time = time.time() - start_inf_time
    fps = (frame_count / total_inf_time)
    log.info("Total-Inference-Time:" + str(total_inf_time))
    log.info("FPS: " + str(fps))
    # Release the capture and destroy any OpenCV windows
    i_feeder.close()
    cv2.destroyAllWindows()
def main(args):
    # set log level
    levels = {
        'debug': logging.DEBUG,
        'info': logging.INFO,
        'warning': logging.WARNING,
        'error': logging.ERROR
    }

    log_level = levels.get(args.log_level, logging.ERROR)

    logging.basicConfig(level=log_level)

    mouse_control = MouseController('high', 'fast')

    logging.info("Model Loading Please Wait ..")
    face_det = FaceDetection(args.face_detection, args.device)
    facial_det = FaceLandmark(args.face_landmark, args.device)
    head_pose_est = HeadPoseEstimation(args.head_pose, args.device)
    gaze_est = GazeEstimation(args.gaze_estimation, args.device)
    logging.info("Model loading successfully")

    inp = InputFeeder(input_type='video', input_file=args.input)
    inp.load_data()

    face_det.load_model()
    facial_det.load_model()
    head_pose_est.load_model()
    gaze_est.load_model()

    video_writer = cv2.VideoWriter(args.output_dir + '/demo_output11.mp4',
                                   cv2.VideoWriter_fourcc(*'MPEG'), 15,
                                   (1920, 1080), True)

    cv2.namedWindow('gaze')
    for frame in inp.next_batch():
        try:
            frame.shape
        except Exception as err:
            break
        crop_face, crop_coords = face_det.predict(frame,
                                                  visualize=args.visualize)

        left_eye, right_eye, left_eye_crop, right_eye_crop = facial_det.predict(
            crop_face, visualize=args.visualize)
        head_pose = head_pose_est.predict(crop_face, visualize=args.visualize)

        (new_x, new_y), gaze_vector = gaze_est.predict(left_eye_crop,
                                                       right_eye_crop,
                                                       head_pose)

        left_eye_gaze = int(left_eye[0] +
                            gaze_vector[0] * 100), int(left_eye[1] -
                                                       gaze_vector[1] * 100)
        right_eye_gaze = int(right_eye[0] +
                             gaze_vector[0] * 100), int(right_eye[1] -
                                                        gaze_vector[1] * 100)

        cv2.arrowedLine(crop_face, left_eye, left_eye_gaze, (0, 0, 255), 2)
        cv2.arrowedLine(crop_face, right_eye, right_eye_gaze, (0, 0, 255), 2)

        video_writer.write(frame)
        mouse_control.move(new_x, new_y)

        if args.show_result:
            cv2.imshow('gaze', frame)
            cv2.waitKey(1)

    inp.close()
    video_writer.release()
    cv2.destroyAllWindows()
def main():
	## calling argparser
	args = build_argparser().parse_args()
	# create a log file
	logging.basicConfig(filename='Project_log.log', level=logging.INFO)
	logger = logging.getLogger()

	## get args input variable 
	input_path = args.input
	## get args visualization flags
	visual_flags = args.flag_visualization

	## put all keys for visualization in dict
	Dict_visual_keys = {
		'args_face': 'fd',
		'args_land': 'fl',
		'args_head': 'hp',
		'args_gaze': 'ge',
		'args_crop': 'crop',
		'args_win': 'win'
	}


	## check if using CAMERA or video file or image
	if input_path == "CAM" or input_path=="cam":
		print("\n## You are using CAMERA right now..." + input_path + " detected!")
		logger.info("\n## You are using CAMERA right now..." + input_path + " detected!")
		feeder_in = InputFeeder(input_path.lower())
	else:
		## check if input file exists in given path
		if not os.path.isfile(input_path):
			print("\nInput file not exists in Path: " + input_path + ". Please check again !!!")
			logger.error("## Input file not exists in Path: " + input_path + ". Please check again !!!")
			exit(1)
		else:
			print('\nInput path exists: '+ input_path + '\n')
			logger.info('\nInput path exists: '+ input_path)
			feeder_in = InputFeeder("video", input_path)


	## handler for mouse moving by precision and speed
	mouse_handler = MouseController('medium', 'fast')

	## initialize 4 models
	model_fd, model_fld, model_hpe, model_ge = models_handler(logger, args)


	feeder_in.load_data()
	print("\n## Loaded Input Feeder ")
	logger.info("## Loaded Input Feeder ")

	## load face detection model
	model_fd_start_time = time.time()
	model_fd.load_model()
	model_fd_load_time = (time.time() - model_fd_start_time)*1000
	logger.info('FaceDetection load time: ' + str(round(model_fd_load_time, 3)) + ' ms')

	## load facial landmarks detection model
	model_fld_start_time = time.time()
	model_fld.load_model()
	model_fld_load_time = (time.time() - model_fld_start_time)*1000
	logger.info('FacialLandmarkDetection load time: ' + str(round(model_fld_load_time, 3)) + ' ms')

	## load head pose estimation model
	model_hpe_start_time = time.time()
	model_hpe.load_model()
	model_hpe_load_time = (time.time() - model_hpe_start_time)*1000
	logger.info('HeadPoseEstimation load time: ' + str(round(model_hpe_load_time, 3)) + ' ms')

	## load gaze estimation model
	model_ge_start_time = time.time()
	model_ge.load_model()
	model_ge_load_time, total_load_time = (time.time() - model_ge_start_time)*1000, (time.time() - model_fd_start_time)*1000
	logger.info('GazeEstimation load time: ' + str(round(model_ge_load_time, 3)) + ' ms')
	## Model load time in total 
	logger.info('Total Load time: ' + str(round(total_load_time, 3)) + ' ms')

	print('\n## All model successfully loaded!')
	logger.info('## All model successfully loaded!')

	frame_count = 0
	print("## Start inference on frame!")
	logger.info("## Start inference on frame!")
	

	## empty list for each model to accumulate infer time and later get average infer time
	fd_infer_time = []
	fld_infer_time = []
	hpe_infer_time = []
	ge_infer_time = []

	start_infer_time = time.time()
	## loop through each frame and start inference on each model
	for flag_return, frame in feeder_in.next_batch():
		# print(flag_return)
		if not flag_return:
			print('\nflag_return: ' + str(flag_return) + '. Video has reach to the end...')
			logger.error('flag_return: ' + str(flag_return) + '. Video has reach to the end...')
			break

		event_key = cv2.waitKey(60)
		## frame count add by 1
		frame_count += 1
		if args.show_info:
			print('\nNo. frame: {}'.format(frame_count))

		if event_key ==27:
			print("\nUser keyboard exit!....")
			break

		## Face detection ##
		t0 = time.time()
		cropped_face, face_coords = model_fd.predict(frame.copy(), args.prob_threshold, args.perf_counts)
		# print(cropped_face.shape)
		## face_coords 
		## top left, bottom right
		fd_infer_time.append((time.time() - t0)*1000)
		# print(fd_infer_time)
		if args.show_info:
			print("Average inference time of FaceDetection model: {} ms".format(np.average(np.asarray(fd_infer_time))))
		
		## if no face detected
		if len(face_coords)==0:
			print("## No Face detected...")
			logger.error("## No face detected. Please check once again!")
			continue
		
		## Landmarks detection ##
		t1 = time.time()
		l_eye_box, r_eye_box, eyes_coords = model_fld.predict(cropped_face.copy(), args.perf_counts)
		# print(l_eye_box.shape, r_eye_box.shape) # left eye and right eye image
		## [left eye box, right eye box] 
		## [[leye_xmin,leye_ymin,leye_xmax,leye_ymax], [reye_xmin,reye_ymin,reye_xmax,reye_ymax]]
		# print(eyes_coords)
		fld_infer_time.append((time.time()- t1)*1000)
		# print(fld_infer_time)
		if args.show_info:
			print("Average inference time of FacialLandmarkDetection model: {} ms".format(np.average(np.asarray(fld_infer_time))))
		
		
		## Head pose detection ##
		t2 = time.time()
		hpe_output = model_hpe.predict(cropped_face.copy(), args.perf_counts)
		# [6.927431583404541, -4.0265960693359375, -1.8397517204284668]
		# print(hpe_output) # yaw, pitch, roll
		hpe_infer_time.append((time.time() - t2)*1000)
		if args.show_info:
			print("Average inference time of HeadPoseEstimation model: {} ms".format(np.average(np.asarray(hpe_infer_time))))

		## Gaze estimation ##		
		t3 = time.time()
		mouse_position, gaze_vector = model_ge.predict(l_eye_box, r_eye_box, hpe_output, args.perf_counts)
		## mouse position (x, y), gaze_vector [-0.13984774, -0.38296703, -0.9055522 ]
		ge_infer_time.append((time.time() - t3)*1000)
		if args.show_info:
			print("Average inference time of GazeEstimation model: {} ms".format(np.average(np.asarray(ge_infer_time))))

		# print('@@@@@@@@@@@@@', len(visual_flags))
				
		## Visualize the result if visual_flags activated
		if len(visual_flags) > 0 and len(visual_flags) <= 6 and Dict_visual_keys['args_win'] in visual_flags:
			frame_copy = frame.copy()

			if Dict_visual_keys['args_face'] in visual_flags:
				# Face
				cv2.rectangle(frame_copy, (face_coords[0], face_coords[1]), (face_coords[2], face_coords[3]), (255,0,0), 2) 				
				
			if Dict_visual_keys['args_land'] in visual_flags:
				# Facial Landmark left right eyes
				cv2.rectangle(frame_copy, (face_coords[0] + eyes_coords[0][0], face_coords[1] + eyes_coords[0][1]), (face_coords[0]+eyes_coords[0][2], face_coords[1]+eyes_coords[0][3]),(255,255,255), 2)
				cv2.rectangle(frame_copy, (face_coords[0] + eyes_coords[1][0], face_coords[1] + eyes_coords[1][1]), (face_coords[0]+eyes_coords[1][2], face_coords[1]+eyes_coords[1][3]),(255,255,255), 2)				
			
			if Dict_visual_keys['args_crop'] in visual_flags:
				## cropped face with landmarks left and right eyes ##
				land_frame = cropped_face.copy()
				cv2.rectangle(land_frame, (eyes_coords[0][0], eyes_coords[0][1]), (eyes_coords[0][2],eyes_coords[0][3]),(0,255,0), 2)
				cv2.rectangle(land_frame, (eyes_coords[1][0], eyes_coords[1][1]), (eyes_coords[1][2],eyes_coords[1][3]),(0,255,0), 2)
				cv2.imshow('FacialLandmark', cv2.resize(land_frame, (300, 400)))

			if Dict_visual_keys['args_head'] in visual_flags:
				# Head Pose values
				cv2.putText(frame_copy, "Angles of Head Pose:", (10, 25), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2)
				cv2.putText(frame_copy, "Yaw: {:.2f}".format(hpe_output[0]), (10, 55), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2)
				cv2.putText(frame_copy, "Pitch: {:.2f}".format(hpe_output[1]), (10, 85), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2)
				cv2.putText(frame_copy, "Roll: {:.2f}".format(hpe_output[2]), (10, 115), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2)

			if Dict_visual_keys['args_gaze'] in visual_flags:
				# Gaze arrow left right eyes
				x, y = gaze_vector[0:2]
				len_add = 400
				## eye left center point (x, y)
				eye_left_center = (int(face_coords[0] + (eyes_coords[0][0]+eyes_coords[0][2])/2), int(face_coords[1] + (eyes_coords[0][1]+eyes_coords[1][3])/2))
				## eye right center point (x, y)
				eye_right_center = (int(face_coords[0] + (eyes_coords[1][0]+eyes_coords[1][2])/2), int(face_coords[1] + (eyes_coords[1][1]+eyes_coords[1][3])/2))			
				## draw arrow line for both gaze of eyes
				cv2.arrowedLine(frame_copy, eye_left_center, (int(eye_left_center[0]+x*len_add), int(eye_left_center[1]-y*len_add)), (0,0,255), 3)
				cv2.arrowedLine(frame_copy, eye_right_center, (int(eye_right_center[0]+x*len_add), int(eye_right_center[1]-y*len_add)), (0,0,255), 3)
			
			## if with '-show win' without model keys will only display normal video stream
			cv2.imshow('Visualization', cv2.resize(frame_copy, (800,700)))
		else:
			print("\n## No Visualization, Only information displaying... \n## If needs visualization please add '-show' with specific keys...")


		if frame_count % 4 == 0:
			## start move mouse each 4 frames
			mouse_handler.move(mouse_position[0], mouse_position[1])

	total_infer_time = time.time() - start_infer_time
	fps = frame_count / round(total_infer_time, 3)

	# print(args.show_info)
	if args.show_info:
		print('Total inference time: ' + str(round(total_infer_time*1000, 3)) + ' ms')
		print("Total frame: " + str(frame_count))
		print('FPS: ' + str(fps))

	## loggging into project log file
	# logger.info('Total inference time: ' + str(round(total_infer_time, 3)) + ' s')	
	logger.info("Average inference time of FaceDetection model: {} ms".format(np.average(np.asarray(fd_infer_time))))
	logger.info("Average inference time of FacialLandmarkDetection model: {} ms".format(np.average(np.asarray(fld_infer_time))))
	logger.info("Average inference time of HeadPoseEstimation model: {} ms".format(np.average(np.asarray(hpe_infer_time))))
	logger.info("Average inference time of GazeEstimation model: {} ms".format(np.average(np.asarray(ge_infer_time))))
	logger.info('Total inference time: ' + str(round(total_infer_time*1000, 3)) + ' ms')
	logger.info("Total frame: " + str(frame_count))
	logger.info('FPS: ' + str(fps))
	logger.error("### Camera Stream or Video Stream has reach to the end...###")

	cv2.destroyAllWindows()
	feeder_in.close()