Exemplos de haarCascade em Python, exemplos de deepgaze.haar_cascade.haarCascade em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: face_tracker.py Projeto: CMatri/eyetracker

 def __init__(self):
     self.video_capture = cv2.VideoCapture(0)
     self.cam_w = int(self.video_capture.get(3))
     self.cam_h = int(self.video_capture.get(4))
     self.blinked = False
     #Defining the camera matrix.
     #To have better result it is necessary to find the focal
     # lenght of the camera. fx/fy are the focal lengths (in pixels)
     # and cx/cy are the optical centres. These values can be obtained
     # roughly by approximation, for example in a 640x480 camera:
     # cx = 640/2 = 320
     # cy = 480/2 = 240
     # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
     self.c_x = self.cam_w / 2
     self.c_y = self.cam_h / 2
     self.f_x = self.c_x / np.tan(60 / 2 * np.pi / 180)
     self.f_y = self.f_x
     self.camera_matrix = np.float32([[self.f_x, 0.0, self.c_x],
                                      [0.0, self.f_y, self.c_y],
                                      [0.0, 0.0, 1.0]])
     self.camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])
     self.landmarks_3D = np.float32([
         P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
         P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
         P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
         P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
     ])
     self.face_cascade = haarCascade(
         "/home/meet/Programming/python/eyetracker/haarcascade_frontalface_alt.xml",
         "/home/meet/Programming/python/eyetracker/haarcascade_profileface.xml"
     )
     self.face_detector = faceLandmarkDetection(
         '/home/meet/Programming/python/eyetracker/shape_predictor_68_face_landmarks.dat'
     )
     self.eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
     self.no_face_counter = 0
     self.face_x1 = 0
     self.face_y1 = 0
     self.face_x2 = 0
     self.face_y2 = 0
     self.face_w = 0
     self.face_h = 0
     self.roi_x1 = 0
     self.roi_y1 = 0
     self.roi_x2 = self.cam_w
     self.roi_y2 = self.cam_h
     self.roi_w = self.cam_w
     self.roi_h = self.cam_h
     self.roi_resize_w = int(self.cam_w / 10)
     self.roi_resize_h = int(self.cam_h / 10)
     self.face_type = -1
     self.no_eye_counter = 0

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ex_pnp_head_pose_estimation_video.py Projeto: yochju/deepgaze

def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if (len(sys.argv) > 2):
        file_path = sys.argv[1]
        if (os.path.isfile(file_path) == False):
            print(
                "ex_pnp_head_pose_estimation: the file specified does not exist."
            )
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if (video_capture.isOpened() == True):
                print(
                    "ex_pnp_head_pose_estimation: the video source has been opened correctly..."
                )
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.cv.CV_FOURCC(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280, 720))
    else:
        print(
            "You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi"
        )
        return

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
    #[         0.0, 603.55869786,  229.7537026],
    #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    my_cascade = haarCascade("./etc/haarcascade_frontalface_alt.xml",
                             "./etc/haarcascade_profileface.xml")
    my_detector = faceLandmarkDetection(
        './etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w / 10)
    roi_resize_h = int(cam_h / 10)

    while (True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2],
                            cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray,
                            runFrontal=True,
                            runFrontalRotated=True,
                            runLeft=False,
                            runRight=False,
                            frontalScaleFactor=1.2,
                            rotatedFrontalScaleFactor=1.2,
                            leftScaleFactor=1.15,
                            rightScaleFactor=1.15,
                            minSizeX=80,
                            minSizeY=80,
                            rotationAngleCCW=30,
                            rotationAngleCW=-30,
                            lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if (my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if (no_face_counter == 30):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if (my_cascade.face_type > 0 and my_cascade.face_type < 4):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if (my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10  #resize_rate + shift_rate
                face_margin_y1 = 20 + 5  #resize_rate + shift_rate
                face_margin_x2 = -20 - 10  #resize_rate + shift_rate
                face_margin_y2 = -20 + 5  #resize_rate + shift_rate
                face_margin_h = -0.7  #resize_factor
                face_margin_w = -0.7  #resize_factor
            #Adjusting the frame for profile right
            elif (my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if (roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if (roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if (roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if (roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if (DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w,
                      face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if (text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1, text_y1),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                              (0, 255, 0), 2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if (my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(
                    frame,
                    face_x1,
                    face_y1,
                    face_x2,
                    face_y2,
                    points_to_return=TRACKED_POINTS)

                if (DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255),
                                   -1)

                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                                  camera_matrix,
                                                  camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec,
                                                camera_matrix,
                                                camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()),
                         (0, 255, 0), 3)  #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()),
                         (255, 0, 0), 3)  #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()),
                         (0, 0, 255), 3)  #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        #if(DEBUG == True):
        #text_x1 = roi_x1
        #text_y1 = roi_y1 - 3
        #if(text_y1 < 0): text_y1 = 0
        #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
        #cv2.rectangle(frame,
        #(roi_x1, roi_y1),
        #(roi_x2, roi_y2),
        #(0, 255, 255),
        #2)

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")

Exemplo n.º 3

0

Exibir arquivo

def main():

    #Defining the video capture object
    video_capture = cv2.VideoCapture(0)

    if(video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / np.tan(60/2 * np.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = np.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0] ])

    #print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = np.float32([[602.10618226,          0.0, 320.27333589],
                                   [         0.0, 603.55869786,  229.7537026],
                                   [         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    #camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = np.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = np.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection('/Users/lamh/Documents/deepgaze/etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    try:
        while(True):

            # Capture frame-by-frame
            ret, frame = video_capture.read()
            gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

            #Return code: 1=Frontal, 2=FrontRotLeft,
            # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
            my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)

            #Accumulate error values in a counter
            if(my_cascade.face_type == 0):
                no_face_counter += 1

            #If any face is found for a certain
            #number of cycles, then the ROI is reset
            if(no_face_counter == 50):
                no_face_counter = 0
                roi_x1 = 0
                roi_y1 = 0
                roi_x2 = cam_w
                roi_y2 = cam_h
                roi_w = cam_w
                roi_h = cam_h

            #Checking which kind of face it is returned
            if(my_cascade.face_type > 0):

                #Face found, reset the error counter
                no_face_counter = 0

                #Because the dlib landmark detector wants a precise
                #boundary box of the face, it is necessary to resize
                #the box returned by the OpenCV haar detector.
                #Adjusting the frame for profile left
                if(my_cascade.face_type == 4):
                    face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                    face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                    face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                    face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                    face_margin_h = -0.7 #resize_factor
                    face_margin_w = -0.7 #resize_factor
                #Adjusting the frame for profile right
                elif(my_cascade.face_type == 5):
                    face_margin_x1 = 20 + 10
                    face_margin_y1 = 20 + 5
                    face_margin_x2 = -20 + 10
                    face_margin_y2 = -20 + 5
                    face_margin_h = -0.7
                    face_margin_w = -0.7
                #No adjustments
                else:
                    face_margin_x1 = 0
                    face_margin_y1 = 0
                    face_margin_x2 = 0
                    face_margin_y2 = 0
                    face_margin_h = 0
                    face_margin_w = 0

                #Updating the face position
                face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
                face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
                face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
                face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
                face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
                face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

                #Updating the ROI position
                roi_x1 = face_x1 - roi_resize_w
                if (roi_x1 < 0): roi_x1 = 0
                roi_y1 = face_y1 - roi_resize_h
                if(roi_y1 < 0): roi_y1 = 0
                roi_w = face_w + roi_resize_w + roi_resize_w
                if(roi_w > cam_w): roi_w = cam_w
                roi_h = face_h + roi_resize_h + roi_resize_h
                if(roi_h > cam_h): roi_h = cam_h
                roi_x2 = face_x2 + roi_resize_w
                if (roi_x2 > cam_w): roi_x2 = cam_w
                roi_y2 = face_y2 + roi_resize_h
                if(roi_y2 > cam_h): roi_y2 = cam_h

                #Debugging printing utilities
                #if(DEBUG == True):
                if(DEBUG == False):
                    #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                    #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                    #Drawing a green rectangle
                    # (and text) around the face.
                    text_x1 = face_x1
                    text_y1 = face_y1 - 3
                    if(text_y1 < 0): text_y1 = 0
                    cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                    cv2.rectangle(frame,
                                 (face_x1, face_y1),
                                 (face_x2, face_y2),
                                 (0, 255, 0),
                                  2)

                # In case of a frontal/rotated face it
                # is called the landamark detector
                if(my_cascade.face_type > 0):
                    landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                    if(DEBUG == True):
                        #cv2.drawKeypoints(frame, landmarks_2D)

                        for point in landmarks_2D:
                            cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                    # Applying the PnP solver to find the 3D pose
                    # of the head from the 2D position of the
                    # landmarks.
                    # retval - bool
                    # rvec - Output rotation vector that, together with tvec, brings
                    # points from the model coordinate system to the camera coordinate system.
                    # tvec - Output translation vector.
                    retval, rvec, tvec = cv2.solvePnP(landmarks_3D,
                                                      landmarks_2D,
                                                      camera_matrix, camera_distortion)

                    # Now we project the 3D points into the image plane
                    # Creating a 3-axis to be used as reference in the image.
                    axis = np.float32([[50,0,0],
                                          [0,50,0],
                                          [0,0,50]])
                    imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                    # Drawing the three axis on the image frame.
                    selection_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                    print("*** Position: {0}, pitch, yaw, roll: {1}, {2}, {3} ***".format(selection_xy, np.degrees(rvec)[0], np.degrees(rvec)[1], np.degrees(rvec)[2]))
                    if(max(imgpts[1].ravel()) <= 500000 and min(imgpts[1].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN

                    if(max(imgpts[2].ravel()) <= 500000 and min(imgpts[2].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE

                    if(max(imgpts[0].ravel()) <= 500000 and min(imgpts[0].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

            #Drawing a yellow rectangle
            # (and text) around the ROI.
            if(DEBUG == True):
                text_x1 = roi_x1
                text_y1 = roi_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
                #cv2.rectangle(frame,
                #             (roi_x1, roi_y1),
                #             (roi_x2, roi_y2),
                #             (0, 255, 255),
                #             2)

            #Showing the frame and waiting
            # for the exit command
            cv2.imshow('Video', frame)
            key = cv2.waitKey(1)
            if key == 27:
                cv2.destroyAllWindowns()
                break
            #if cv2.waitKey(1) & 0xFF == ord('q'): break
    except KeyboardInterrupt:
        #Release the camera
        video_capture.release()
        print("KeyboardInterrupt: camera released. Bye ...")

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ex_haar_face_detection.py Projeto: houcy/deepgaze

#The MIT License (MIT)
#Copyright (c) 2016 Massimiliano Patacchiola
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#Using the OpenCV haar cascade classifiers to find a face in image.


from deepgaze.haar_cascade import haarCascade
import cv2

#Declaring the face detector object and loading the XML config file
my_cascade = haarCascade("../../etc/xml/haarcascade_frontalface_alt.xml", "../../etc/xml/haarcascade_profileface.xml")

#Reading the image in black/withe
image = cv2.imread("./bellucci.jpg",0)

#Calling the findFace method
my_cascade.findFace(image, runFrontal=True, runFrontalRotated=True, 
                    runLeft=True, runRight=True, 
                    frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, 
                    leftScaleFactor=1.15, rightScaleFactor=1.15, 
                    minSizeX=64, minSizeY=64, 
                    rotationAngleCCW=30, rotationAngleCW=-30)

#The coords of the face are saved in the class object
face_x1 = my_cascade.face_x
face_y1 = my_cascade.face_y

Exemplo n.º 5

0

Exibir arquivo

Arquivo: webcam.py Projeto: CMatri/eyetracker

import cv2
import numpy as np
import tensorflow as tf
from deepgaze.head_pose_estimation import CnnHeadPoseEstimator
from deepgaze.haar_cascade import haarCascade

face_cascade = haarCascade(
    "/home/meet/Programming/python/eyetracker/haarcascade_frontalface_alt.xml",
    "/home/meet/Programming/python/eyetracker/haarcascade_profileface.xml")
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
cam = cv2.VideoCapture(0)
cam_w = int(cam.get(3))
cam_h = int(cam.get(4))
no_face_counter = 0
face_x0 = 0
face_y0 = 0
face_x1 = 0
face_y1 = 0
face_w = 0
face_h = 0
roi_x0 = 0
roi_y0 = 0
roi_x1 = cam_w
roi_y1 = cam_h
roi_w = cam_w
roi_h = cam_h
roi_resize_w = int(cam_w / 10)
roi_resize_h = int(cam_h / 10)


def detect_eyes(img, gray_img):

Exemplo n.º 6

0

Exibir arquivo

Arquivo: head_angle_detect.py Projeto: indigopyj/DeepLearner

def main():
    #counter for counting not frontal-gazing frames
    COUNTER = 0
    #alarm for drowsiness
    ALARAM_ON = False

    #Defining the video capture object
    video_capture = cv2.VideoCapture("head_pose4.mp4")

    if (video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera초점거리를 알면 좋음.
    # fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589],
                                   [0.0, 603.55869786, 229.7537026],
                                   [0.0, 0.0, 1.0]])

    #Distortion coefficients
    #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = numpy.float32(
        [0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    my_cascade = haarCascade("./etc/xml/haarcascade_frontalface_alt.xml",
                             "./etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection(
        './etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w / 10)
    roi_resize_h = int(cam_h / 10)

    while (True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2],
                            cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray,
                            True,
                            True,
                            True,
                            True,
                            1.10,
                            1.10,
                            1.15,
                            1.15,
                            40,
                            40,
                            rotationAngleCCW=30,
                            rotationAngleCW=-30,
                            lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if (my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if (no_face_counter == 50):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if (my_cascade.face_type > 0):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if (my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10  #resize_rate + shift_rate
                face_margin_y1 = 20 + 5  #resize_rate + shift_rate
                face_margin_x2 = -20 - 10  #resize_rate + shift_rate
                face_margin_y2 = -20 + 5  #resize_rate + shift_rate
                face_margin_h = -0.7  #resize_factor
                face_margin_w = -0.7  #resize_factor
            #Adjusting the frame for profile right
            elif (my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if (roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if (roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if (roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if (roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if (DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w,
                      face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if (text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1, text_y1),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                              (0, 255, 0), 2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if (my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(
                    frame,
                    face_x1,
                    face_y1,
                    face_x2,
                    face_y2,
                    points_to_return=TRACKED_POINTS)

                if (DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255),
                                   -1)

                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                                  camera_matrix,
                                                  camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec,
                                                camera_matrix,
                                                camera_distortion)

                #Compute rotate_degree
                modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec,
                                                   camera_matrix,
                                                   camera_distortion)
                rvec_matrix = cv2.Rodrigues(rvec)[0]

                proj_matrix = numpy.hstack((rvec_matrix, tvec))
                eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]

                pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]

                roll = math.degrees(math.asin(math.sin(roll)))
                print(str(roll))
                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()),
                         (0, 255, 0), 3)  #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()),
                         (255, 0, 0), 3)  #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()),
                         (0, 0, 255), 3)  #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        if (DEBUG == True):
            text_x1 = roi_x1
            text_y1 = roi_y1 - 3
            if (text_y1 < 0): text_y1 = 0
            cv2.putText(frame, "ROI", (text_x1, text_y1),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
            cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2),
                          (0, 255, 255), 2)

            if (roll <= -40.0):
                COUNTER = COUNTER + 1
                cv2.putText(frame, "Not frontal-gazing", (10, cam_h - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
                print("counter: {}".format(counter))

                if (COUNTER >= 20):
                    if not ALARAM_ON:
                        ALARAM_ON = True

                    cv2.putText(frame, "DROWSINESS ALERT!", (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            else:
                COUNTER = 0
                ALARAM_ON = False

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ex_icub_head_imitation.py Projeto: vatresia/pyERA

def main():
    # The state machine has the following states:
    # [VOID, INIT, FIND, SHOW, KEY, ICUB, WHICH, QUIT]
    #
    # VOID: Empty state, to use for test
    # INIT: It is called only once for the initialisation
    # FIND: use the hardware face and landmark detection libraries
    # SHOW: Print the image on screen using OpenCV
    # KEY: Check which key is pressed
    # ICUB: Pressing the (h) button the robot look in front of itself
    # WHICH: Pressing the (w) button is like asking to the robot to look to a object on the table
    # QUIT: Pressing (q) unsubscribe and close the script

    #Configuration Variables, adjust to taste
    #ICUB_IP = "192.168.0.100"
    #ICUB_PORT = 9559
    RECORD_VIDEO = True #If True record a video from the ICUB camera
    USE_FESTIVAL_TTS = True #To use the Festival Text To Speach
    #If you want to use Acapela TTS you have to fill the
    #following variable with the correct values
    #USE_ACAPELA_TTS = False
    #ACCOUNT_LOGIN = '******'
    #APPLICATION_LOGIN = '******'
    #APPLICATION_PASSWORD = '******'
    #SERVICE_URL = 'http://vaas.acapela-group.com/Services/Synthesizer'
    #The initial state
    STATE = "VOID" 

    while(True):

        #Empty STATE, to be used for test
        if(STATE == "VOID"):            
            STATE = "INIT"

        # The zero state is an init phase
        # In this state all the proxies are
        # created and tICUB subscribe to the services
        #
        elif(STATE=="INIT"):
            #Init some generic variables
            #This counter allows continuing the program flow
            #without calling a sleep
            which_counter = 0 #Counter increased when WHICH is called
            which_counter_limit = 30 #Limit for the which counter

            #Init YARP
            print("[STATE " + str(STATE) + "] " + "YARP init" + "\n")
            yarp.Network.init()
            
            #Camera connection
            try:
                print("[STATE " + str(STATE) + "] " + "Waiting for '/icubSim/cam/left' ..." + "\n")
                cam_w = 320 #640
                cam_h = 240 #480
                input_port = yarp.Port()
                input_port.open("/pyera-image-port")
                yarp.Network.connect("/icubSim/cam/left", "/pyera-image-port")
                img_array = np.zeros((cam_h, cam_w, 3), dtype=np.uint8)
                yarp_image = yarp.ImageRgb()
                yarp_image.resize(cam_w, cam_h)
                yarp_image.setExternal(img_array, img_array.shape[1], img_array.shape[0])
            except BaseException, err:
                print("[ERROR] connect To Camera catching error " + str(err))
                return

            #Head connection and Reset
            print("[STATE " + str(STATE) + "] " + "Waiting for '/icubSim/head/rpc:i' ..." + "\n")
            rpc_client = yarp.RpcClient()
            rpc_client.addOutput("/icubSim/head/rpc:i")
            print("[STATE " + str(STATE) + "] " + "Reset the head position..." + "\n")
            set_icub_head_pose(rpc_client, roll=0, pitch=0, yaw=0)

            #Initialise the OpenCV video recorder
            if(RECORD_VIDEO == True):
                print("[STATE " + str(STATE) + "] " + "Starting the video recorder..." + "\n")
                fourcc = cv2.cv.CV_FOURCC(*'XVID')
                video_out = cv2.VideoWriter("./output.avi", fourcc, 20.0, (cam_w,cam_h))

            #Init dlib Face detector
            #my_face_detector = dlib.get_frontal_face_detector()

            #Init the Deepgaze face detector
            my_cascade = haarCascade("./haarcascade_frontalface_alt.xml", "./haarcascade_profileface.xml")

            #Talking            
            if(USE_FESTIVAL_TTS == True): 
                print("[STATE " + str(STATE) + "] " + "Trying the TTS Festival..." + "\n")
                say_something("Hello World, I'm ready!")

            #if(USE_ACAPELA_TTS == True):                
                #print("[ACAPELA] Downloading the mp3 file...")
                #tts_acapela = acapela.Acapela(account_login=ACCOUNT_LOGIN, application_login=APPLICATION_LOGIN, application_password=APPLICATION_PASSWORD, 
                                              #service_url=SERVICE_URL, quality='22k', directory='/tmp/')    
                #tts_acapela.prepare(text="Hello world, I'm ready!", lang='US', gender='M', intonation='NORMAL')
                #output_filename = tts_acapela.run()
                #subprocess.Popen(["play","-q","/tmp/" + str(output_filename)])
                #print "[ACAPELA] Recorded TTS to %s" % output_filename           

            #Swithc to STATE > 1
            print("[STATE " + str(STATE) + "] " + "Switching to next state" + "\n")
            time.sleep(2)
            STATE = "FIND"

        # Get data from landmark detection and 
        # face detection.
        #
        elif(STATE=="FIND"):

            #Get Data for Face Detection
            #if(face_data and isinstance(face_data, list) and len(face_data) > 0):
                #print("[ICUB] Face detected!")
            #else:
                 #print("No face detected...")
                 #is_face_detected = False
            input_port.read(yarp_image)

            '''
            faces_array = my_face_detector(img_array, 1)
            print("Total Faces: " + str(len(faces_array)))
            for i, pos in enumerate(faces_array):

                face_x1 = pos.left()
                face_y1 = pos.top()
                face_x2 = pos.right()
                face_y2 = pos.bottom()
                text_x1 = face_x1
                text_y1 = face_y1 - 3

                cv2.putText(img_array, "FACE " + str(i+1), (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                cv2.rectangle(img_array, 
                         (face_x1, face_y1), 
                         (face_x2, face_y2), 
                         (0, 255, 0), 
                          2)
            '''
            gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
            # Return code: 1=Frontal, 2=FrontRotLeft, 3=FronRotRight,
            #              4=ProfileLeft, 5=ProfileRight.
            my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=False, runLeft=True, runRight=True, 
                                frontalScaleFactor=1.18, rotatedFrontalScaleFactor=1.18, leftScaleFactor=1.18, 
                                rightScaleFactor=1.18, minSizeX=70, minSizeY=70, rotationAngleCCW=30, 
                                rotationAngleCW=-30, lastFaceType=my_cascade.face_type)   

            face_x1 = my_cascade.face_x 
            face_y1 = my_cascade.face_y 
            face_x2 = my_cascade.face_x + my_cascade.face_w 
            face_y2 = my_cascade.face_y + my_cascade.face_h
            text_x1 = face_x1
            text_y1 = face_y1 - 3
            if(my_cascade.face_type == 1 or my_cascade.face_type == 2 or my_cascade.face_type == 3): cv2.putText(img_array, "FRONTAL", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
            elif(my_cascade.face_type == 4): cv2.putText(img_array, "LEFT", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
            elif(my_cascade.face_type == 5): cv2.putText(img_array, "RIGH", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
            cv2.rectangle(img_array, 
                             (face_x1, face_y1), 
                             (face_x2, face_y2), 
                             (0, 255, 0),
                              2)

            is_face_detected = False
            STATE = "SHOW"

Exemplo n.º 8

0

Exibir arquivo

Arquivo: ex_pnp_head_pose_estimation_video.py Projeto: mpatacchiola/deepgaze

def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if(len(sys.argv) > 2):
        file_path = sys.argv[1]
        if(os.path.isfile(file_path)==False): 
            print("ex_pnp_head_pose_estimation: the file specified does not exist.")
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if(video_capture.isOpened() == True): print("ex_pnp_head_pose_estimation: the video source has been opened correctly...")
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.cv.CV_FOURCC(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280,720))
    else:
        print("You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi")
        return


    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels) 
    # and cx/cy are the optical centres. These values can be obtained 
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y], 
                                   [0.0, 0.0, 1.0] ])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   #[         0.0, 603.55869786,  229.7537026], 
                                   #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("../etc/xml/haarcascade_frontalface_alt.xml", "../etc/xml/haarcascade_profileface.xml")
    my_detector = faceLandmarkDetection('../etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    while(True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        if frame is None: return
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft, 
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=True, runLeft=False, runRight=False, frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, leftScaleFactor=1.15, rightScaleFactor=1.15, minSizeX=80, minSizeY=80, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if(my_cascade.face_type == 0): 
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if(no_face_counter == 30):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if(my_cascade.face_type > 0 and my_cascade.face_type < 4):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if(my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                face_margin_h = -0.7 #resize_factor
                face_margin_w = -0.7 #resize_factor
            #Adjusting the frame for profile right
            elif(my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position       
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if(roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if(roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if(roi_h > cam_h): roi_h = cam_h    
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if(roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if(DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                cv2.rectangle(frame, 
                             (face_x1, face_y1), 
                             (face_x2, face_y2), 
                             (0, 255, 0),
                              2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if(my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                if(DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings 
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, 
                                                  landmarks_2D, 
                                                  camera_matrix, camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50,0,0], 
                                      [0,50,0], 
                                      [0,0,50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as: 
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        #if(DEBUG == True):
            #text_x1 = roi_x1
            #text_y1 = roi_y1 - 3
            #if(text_y1 < 0): text_y1 = 0
            #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
            #cv2.rectangle(frame, 
                         #(roi_x1, roi_y1), 
                         #(roi_x2, roi_y2), 
                         #(0, 255, 255),
                         #2)

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
   
    #Release the camera
    video_capture.release()
    print("Bye...")

Exemplo n.º 9

0

Exibir arquivo

def main():
    #Defining the video capture object
    video_capture = cv2.VideoCapture(1)
    thresh = 0.25
    frame_check = 15
    detect = dlib.get_frontal_face_detector()
    predict = dlib.shape_predictor("/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat")# change to path where landmark points are stored

    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["left_eye"]   # get the left eye index
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["right_eye"]  # get the right eye index
    flag=0

    sess = tf.Session()
    my_head_pose_estimator = CnnHeadPoseEstimator(sess)
    my_head_pose_estimator.load_roll_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/roll/cnn_cccdd_30k.tf"))# change to deepgaze directory path
    my_head_pose_estimator.load_pitch_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/pitch/cnn_cccdd_30k.tf"))
    my_head_pose_estimator.load_yaw_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/yaw/cnn_cccdd_30k.tf"))


    #Start of Eye Gaze Tracking
    win = dlib.image_window()
    
    predictor_path = "/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat"
    roi = []
    ref_point = 0 
    index1 = 0
    pt_lefteye_corner_x= 0
    pt_lefteye_corner_y = 0
    pt_pos1 = 0
    predictor = dlib.shape_predictor(predictor_path)
    pt_x2 =0
    pt_y2 = 0
    pt_x1 = 0
    pt_y1 = 0
    pt_actualx = 0
    pt_actualy = 0
    detector = dlib.get_frontal_face_detector()
    flag = 0
    flag1 = 0
    pt_righteye_corner_x = 0
    pt_righteye_corner_y = 0


    if(video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0] ])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   [         0.0, 603.55869786,  229.7537026],
                                   [         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection('/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat')





    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    while(True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        ret, frame_eye = video_capture.read()

        #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0])  + "]")
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        img = cv2.cvtColor(frame_eye, cv2.COLOR_RGB2BGR) #for eye gaze detection
        drowsyframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        subjects = detect(drowsyframe, 0)
        for subject in subjects:
            shape = predict(frame,subject)
            shape = face_utils.shape_to_np(shape)
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            ear = leftEAR + rightEAR / 2.0
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)
            cv2.drawContours(frame, [leftEyeHull], -1, (0,255,0),1)
            cv2.drawContours(frame, [rightEyeHull], -1, (0,255,0), 1)
            if ear<thresh:
                flag+= 1
                print(flag)
                if flag >= frame_check:
                    cv2.putText(frame, "WAKEUPPPP", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)
                    cv2.putText(frame, "WAKEUPPPP", (10, 325), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255),2)
            else:
                flag=0

#Eye Gaze Detetction
        dets = detect(img, 0)
        check = 5
        shapes_eye = []
        for k,d in enumerate(dets):
            #print("dets{}".format(d))
            #print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(k, d.left(), d.top(), d.right(), d.bottom()))

            shape_eye = predict(img, d)

            for index, pt in enumerate(shape_eye.parts()):
                #print('Part {}: {}'.format(index, pt))
                pt_pos = (pt.x, pt.y)
                cv2.circle(img, pt_pos, 1, (0,225, 0), 2)
                if index == 29:
                    pt_x2 = int(pt.x)
                    pt_y2 = int(pt.y)
                if index == 18:
                    pt_x1 = int(pt.x)
                    pt_y1 = int(pt.y)
                if index == 37:
                    pt_righteye_corner_x = pt.x
                    pt_righteye_corner_y = pt.y
                if index == 40:
                    pt_lefteye_corner_x = pt.x
                    pt_lefteye_corner_y = pt.y
                roi =  frame_eye[pt_y1:pt_y2,pt_x1:pt_x2]
                roi_gray = cv2.cvtColor(roi,cv2.COLOR_RGB2GRAY)
                _, threshold = cv2.threshold(roi_gray, 30, 255, cv2.THRESH_BINARY_INV)
                try:
                    M = cv2.moments(threshold)
                    #print(M)
                    cX = int(M["m10"]/M["m00"])
                    cY = int(M["m01"]/M["m00"])
                    #print(cX,cY)
                    pt_actualx = pt_x1+cX
                    pt_actualy = pt_y1+cY
                    #print(pt_actualx,pt_actualy)
                    diff_right = pt_actualx-pt_righteye_corner_x
                    diff_left = pt_lefteye_corner_x - pt_actualx
                    print(diff_right,diff_left)
                    #print(cX,cY)
                    if diff_right < 3:
                        cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2)
                    if diff_left <3:
                        cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2)


                except:
                    pass
                cv2.circle(frame,(pt_actualx,pt_actualy), 2,(255,0,255),-1)
                #print(pt_actualx,pt_actualy)


            #print(pt_x1,pt_x2,pt_y1,pt_y2)
            #print(roi.shape_eye)
            #print(img.shape_eye)
            try:
                cv2.imshow("threshold", threshold)
                cv2.waitKey(1)
            except:
                pass

        win.clear_overlay()
        win.set_image(img)
        if len(shapes_eye)!= 0 :
            for i in range(len(shapes_eye)):
                win.add_overlay(shapes_eye[i])





        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)
        #print(returnvalue)
        #Accumulate error values in a counter
        if(my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if(no_face_counter == 50):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if(my_cascade.face_type > 0):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if(my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                face_margin_h = -0.7 #resize_factor
                face_margin_w = -0.7 #resize_factor
            #Adjusting the frame for profile right
            elif(my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            crop_img = frame[face_y1:face_y2, face_x1:face_x2]
            cv2.imshow("cropped", crop_img)

            roll = my_head_pose_estimator.return_roll(crop_img)
            pitch = my_head_pose_estimator.return_pitch(crop_img)
            yaw = my_head_pose_estimator.return_yaw(crop_img)
            #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0])  + "]")

            if yaw > 30:
                cv2.putText(frame, "You are facing right!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)
            if yaw < -30:
                cv2.putText(frame, "You are facing left!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)








            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if(roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if(roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if(roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if(roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if(DEBUG == True):
                #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)

                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                cv2.rectangle(frame,
                             (face_x1, face_y1),
                             (face_x2, face_y2),
                             (0, 255, 0),
                              2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if(my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                if(DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D,
                                                  landmarks_2D,
                                                  camera_matrix, camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50,0,0],
                                      [0,50,0],
                                      [0,0,50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        if(DEBUG == True):
            text_x1 = roi_x1
            text_y1 = roi_y1 - 3
            if(text_y1 < 0): text_y1 = 0
            cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
            cv2.rectangle(frame,
                         (roi_x1, roi_y1),
                         (roi_x2, roi_y2),
                         (0, 255, 255),
                         2)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")