class VideoImgManager():
    def __init__(self):
        self.POSE_ESTIMATOR = PoseEstimator()
        self.FRAME_OPS = FrameOperations()

        self.FIRST = True

    def estimate_vid(self, webcam_id=0):
        """reads webcam, applies pose estimation on webcam"""
        cap = cv.VideoCapture(webcam_id)

        while (True):
            has_frame, frame = cap.read()

            if self.FIRST:
                self.WEB_CAM_H, self.WEB_CAM_W = frame.shape[0:2]
                self.FIRST = False

            frame = self.POSE_ESTIMATOR.get_pose_key_angles(frame)

            cv.imshow('frame', frame)
            if cv.waitKey(1) & 0xFF == ord('q'):
                break

    def estimate_img(self, img_path):
        """applies pose estimation on img"""

        img = cv.imread(img_path)

        img = self.POSE_ESTIMATOR.get_pose_key_angles(img)

        cv.imshow("Image Pose Estimation", img)

        cv.waitKey(0)
        cv.destroyAllWindows()
示例#2
0
    def __init__(self):
        print "OpenCV version: {}".format(cv2.__version__)
        self.CNN_INPUT_SIZE = 128

        self.mark_detector = MarkDetector()

        height = 480
        width = 640
        self.pose_estimator = PoseEstimator(img_size=(height, width))

        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(8)
        ]

        self.img_queue = Queue()
        self.box_queue = Queue()
        self.box_process = Process(target=self.get_face,
                                   args=(
                                       self.mark_detector,
                                       self.img_queue,
                                       self.box_queue,
                                   ))

        self.process_ctrl_flag = rospy.get_param("process_ctrl", True)
        self.process_ctrl_subscriber = rospy.Subscriber(
            "/head_pose_estimator/process_ctrl", Bool,
            self.process_ctrl_callback)

        self.sub_img_name = rospy.get_param("sub_img_name",
                                            "/usb_cam/image_raw")
        self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image,
                                               self.img_callback)

        self.show_result_img_flag = rospy.get_param("show_result_img", True)
        self.show_axis_flag = rospy.get_param("show_axis", True)
        self.show_annotation_box_flag = rospy.get_param(
            "show_annotation_box", True)
        self.pub_result_img_flag = rospy.get_param("pub_result_img", True)

        self.img_publisher = rospy.Publisher(
            "/head_pose_estimator/result_image", Image, queue_size=10)

        self.result_publisher = rospy.Publisher(
            "/head_pose_estimator/head_pose", HeadPose, queue_size=10)

        self.box_process.start()
示例#3
0
	def __init__(self):
		# Load the parameters
		self.conf = config()
		# initialize dlib's face detector (HOG-based) and then create the
		# facial landmark predictor
		print("[INFO] loading facial landmark predictor...")
		self.detector = dlib.get_frontal_face_detector()
		self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path)
		
		# grab the indexes of the facial landmarks for the left and
		# right eye, respectively
		(self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
		(self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
		
		# initialize the video stream and sleep for a bit, allowing the
		# camera sensor to warm up
		self.cap = cv2.VideoCapture(0)
		if self.conf.vedio_path == 0:
			self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
		_, sample_frame = self.cap.read()
		
		# Introduce mark_detector to detect landmarks.
		self.mark_detector = MarkDetector()
		
		# Setup process and queues for multiprocessing.
		self.img_queue = Queue()
		self.box_queue = Queue()
		self.img_queue.put(sample_frame)
		self.box_process = Process(target=get_face, args=(
			self.mark_detector, self.img_queue, self.box_queue,))
		self.box_process.start()
		
		# Introduce pose estimator to solve pose. Get one frame to setup the
		# estimator according to the image size.
		self.height, self.width = sample_frame.shape[:2]
		self.pose_estimator = PoseEstimator(img_size=(self.height, self.width))
		
		# Introduce scalar stabilizers for pose.
		self.pose_stabilizers = [Stabilizer(
			state_num=2,
			measure_num=1,
			cov_process=0.1,
			cov_measure=0.1) for _ in range(6)]
		
		self.tm = cv2.TickMeter()
		# Gaze tracking
		self.gaze = GazeTracking()
示例#4
0
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
示例#5
0
    def get_source_shoulder_details(self, source_img, cloth_seg):
        "get source shoulder distance and rotation angle"

        try:
            # initialize source pose estimator
            self.source_pose_estimator = PoseEstimator(source_img)
            source_distance = self.source_pose_estimator.get_shoulder_details()
            source_points = self.source_pose_estimator.get_shoulder_points()
        except Exception as e:
            print(str(e))
            # if no source shoulder points detected
            self.error_list.append("Issue in source image:" + str(e))
            logging.warning(
                "Using manual shoulder detection for source image.")
            source_points, source_distance = custom_shoulder_locator.get_shoulder_details_mannual(
                cloth_seg)

        return source_points, source_distance
示例#6
0
    def run(self):
        estimator = PoseEstimator("./usb_camera.yml")

        # 2. 加载图片
        capture = cv2.VideoCapture(2)
        if not capture.isOpened():
            print("摄像头打开失败!")
            return

        capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 960)

        try:
            while not rospy.is_shutdown():
                _, frame = capture.read()
                if self.robot.isMoving():
                    continue
                rst, image, pose = estimator.estimate(frame)
                cv2.imshow("img", image)

                if rst:
                    print(("xyz-rpy: " + "{:>8.3f}," * 6).format(
                        pose[0], pose[1], pose[2], pose[3], pose[4], pose[5]))

                    self.robot.move_rotation(radians(-pose[4]))

                else:
                    print("未发现目标")
                    #self.robot.move_rotation(radians(90))
                    self.robot.stop()

                action = cv2.waitKey(30) & 0xff
                if action == ord('s'):
                    cv2.imwrite("save_line.jpg", image)
                elif action == 27:
                    break
                self.rate.sleep()
        except KeyboardInterrupt as e:
            print("Ctrl + c 主动停止了程序")
        finally:
            capture.release()
            cv2.destroyAllWindows()
示例#7
0
    def get_frame3(self, mark_detector, pose_stabilizers, flag1):
        global height, width
        success, image = self.video.read()
        print(image)
        if flag1 == True:
            height, width = image.shape[:2]
            # height = self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)
            # width = self.video.get(cv2.CAP_PROP_FRAME_WIDTH)

            flag1 = False
        pose_estimator = PoseEstimator(img_size=(height, width))
        img4 = pose(image, mark_detector, pose_estimator, pose_stabilizers)
        ret, jpeg = cv2.imencode('.jpg', img4)
        return jpeg.tobytes(), flag1
示例#8
0
def init():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    return cap, video_src, img_queue, box_queue, tm, mark_detector, pose_estimator
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
示例#10
0
def Camera_Capture():
    mark_detector = MarkDetector()
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)
    
    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok = 'True')
    
    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok = 'True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    poses=['frontal','right','left','up','down']
    file=0
    cap = cv2.VideoCapture(file)
    
    ret, sample_frame = cap.read()
    i = 0
    count = 0
    if ret==False:
        return    
        
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    
    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]
    images_saved_per_pose=0
    number_of_images = 0
    
    shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH)
    while i<5:
        saveit = False
        # Read frame, crop it, flip it, suits your needs.
        ret, frame = cap.read()
        if ret is False:
            break
        if count % 5 !=0: # skip 5 frames
            count+=1
            continue
        if images_saved_per_pose==IMAGE_PER_POSE:
            i+=1
            images_saved_per_pose=0

        # If frame comes from webcam, flip it so it looks like a mirror.
        if file == 0:
            frame = cv2.flip(frame, 2)
        original_frame=frame.copy()
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        facebox = mark_detector.extract_cnn_facebox(frame)
    
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1=max(facebox[0]-0,0)
            x2=min(facebox[2]+0,width)
            y1=max(facebox[1]-0,0)
            y2=min(facebox[3]+0,height)
            
            face = frame[y1: y2,x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    
            marks = mark_detector.detect_marks([face_img])
    
            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]
        
            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
    
            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))
    
            # print(steady_pose[0][0])
            # if steady_pose[0][0]>0.1:
            #     print('right')
            # else: 
            #     if steady_pose[0][0]<-0.1:
            #         print('left')
            # if steady_pose[0][1]>0.1:
            #     print('down')
            # else: 
            #     if steady_pose[0][1]<-0.1:
            #         print('up')
            # print(steady_pose[0])
            if i==0:
                if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True                    
            if i==1:
                if steady_pose[0][0]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==2:
                if steady_pose[0][0]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==3:
                if steady_pose[0][1]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==4:
                if steady_pose[0][1]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            # Show preview.
            if i>=5:
                print ('Thank you')
                break

            frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA)
            frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2)
            cv2.imshow("Preview", frame)

            if saveit:                        
                face = dlib.rectangle(x1, y1, x2, y2)
                face_aligned = face_aligner.align(original_frame, frame_gray, face)
                cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned)
                print(images_saved_per_pose)
                number_of_images+=1

        if cv2.waitKey(10) == 27:
            break    
    cap.release()
    cv2.destroyAllWindows()
def headPoseEstimation():
    print("HEAD POSE ESTIMATION...")
    """MAIN"""
    # Video source from webcam or video file.
    #video_src = 0
    #video_src = 'EWSN.avi'
    #cam = cv2.VideoCapture(video_src)
    #_, sample_frame = cam.read()

    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    print("1")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    print("2")
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    print("3")
    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    # start = time.time()
    # previousTime1 = start
    # previousTime2 = start
    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break
        #print("4")
        #print(frame.shape)
        #print("5")
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        #if video_src == 0:
        #    print("6")
        #    frame = cv2.flip(frame, 2)
        #    cv2.imwrite("Preview.png", frame)
        #    print("7")

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)
        #print("8")
        # Get face from box queue.
        facebox = box_queue.get()
        #print(type(facebox)
        #print(facebox
        #print("9")
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            print(face_img.shape)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # # Get average position of nose
            noseMarksTemp = []
            noseMarksTemp.append(marks[30][0])
            noseMarksTemp.append(marks[30][1])
            noseMarks[0] = noseMarksTemp

            for i in range(9, 0, -1):
                noseMarks[i] = noseMarks[i - 1]

            # Get the direction of head movement
            headPoseDirection = calculateDirection(noseMarks)
            #if headPoseDirection != 'still':
            directionArray.append(headPoseDirection)
            #print(directionArray
            #print(len(directionArray)
            print("To capture a movement press 'a' and perform a movement.")
            #currentTime1 = time.time()
            if cv2.waitKey(5) == ord('a') and not classifyMoves:
                classifyMoves = 1
                print("Start classifying movement...")
                timer = time.time()
                currentTime = timer
                previousTime1 = timer
                previousTime2 = timer

            if cv2.waitKey(5) == ord('b') and classifyMoves:
                classifyMoves = 0
                print("Stopped classifying movement...")

            currentTime = time.time()
            if currentTime - previousTime1 > 2 and classifyMoves:
                print("------------------------------------------------")
                print("Elapsed timer 1: " + str(currentTime - previousTime1))
                #print(len(directionArray)

                # Get most common direction
                moveClass = mostCommon(directionArray)
                #moveSequence.append(moveClass)
                print(moveClass)

                # Get a sequence of head movements
                # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0:
                #     print("Elapsed timer 2: " + str(currentTime - previousTime2)
                #     numMoves = len(moveSequence)
                #     moves = moveSequence[(numMoves-5):(numMoves-1)]
                #     print(moves
                #     moveSequence = []
                #     previousTime2 = currentTime
                #     classifyMoves = 0
                classifyMoves = 0
                directionArray = []
                previousTime1 = currentTime

                print(
                    "To continue type 'c' or to recapture a movement type 'a'."
                )

            if cv2.waitKey(5) == ord('c'):
                break

            #print(previousTime
            # Uncomment following line to show raw marks.
            #mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # if len(moves) > 1:
        #     cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4)

        cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0),
                    4)

        # Show preview.
        #cv2.namedWindow("", cv2.WINDOW_NORMAL)
        cv2.imshow("Preview", frame)
        #cv2.resizeWindow("preview", 5000,5000)
        if cv2.waitKey(5) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    return moveClass
示例#12
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    instructions = 0
    print "--- Intention classification for communication between a human and a robot ---"
    if instructions == 1:
        print "First you will be required to present a facial expression before you will do a head movement."
        print "If done correctly these gestures will be detected by the robot and it will perform the desired task."
        raw_input("Press Enter to continue...")

    if instructions == 1:
        print "This is the module for facial expression recognition."
        print "This program can detect the emotions: Happy and Angry."
        print "The program will look for the expression for 3 seconds."
    raw_input("To proceed to Facial Expression Recognition press Enter...")
    predictExp = 0

    # Load Facial Expression Recognition trained model
    print "- Loading FER model... -"
    #faceExpModel = tf.keras.models.load_model("/home/bjornar/ML_models/FER/Good models(80+)/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5")
    faceExpModel = tf.keras.models.load_model(
        "/home/project/Bjorn/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5"
    )

    # Load Face Cascade for Face Detection
    print "- Loading Face Cascade for Face Detection... -"
    #cascPath = "/home/bjornar/MScDissertation/TrainingData/FaceDetection/haarcascade_frontalface_default.xml"
    cascPath = "/home/project/Bjorn/IntentionClassification-Repository/haarcascade_frontalface_default.xml"
    faceCascade = cv2.CascadeClassifier(cascPath)

    ## Initializing Head Movement variables
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    sample_frame = cv2.imread("sample_frame.png")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]

    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    if camera == 'kinect':
        ## Initialize Kinect camera
        print "Initializing camera..."
        try:
            from pylibfreenect2 import OpenGLPacketPipeline
            pipeline = OpenGLPacketPipeline()
        except:
            try:
                from pylibfreenect2 import OpenCLPacketPipeline
                pipeline = OpenCLPacketPipeline()
            except:
                from pylibfreenect2 import CpuPacketPipeline
                pipeline = CpuPacketPipeline()
        #print("Packet pipeline:", type(pipeline).__name__)

        # Create and set logger
        #logger = createConsoleLogger(LoggerLevel.Debug)
        setGlobalLogger()

        fn = Freenect2()
        num_devices = fn.enumerateDevices()
        if num_devices == 0:
            print("- No device connected! -")
            sys.exit(1)

        serial = fn.getDeviceSerialNumber(0)
        device = fn.openDevice(serial, pipeline=pipeline)

        listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir
                                          | FrameType.Depth)

        # Register listeners
        device.setColorFrameListener(listener)
        device.setIrAndDepthFrameListener(listener)

        device.start()

        # NOTE: must be called after device.start()
        registration = Registration(device.getIrCameraParams(),
                                    device.getColorCameraParams())

    elif camera == 'webcam':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(-1)

    elif camera == 'video':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(
            "/home/project/Bjorn/SonyHandycamTest.mp4")

    ## Facial Expression Recognition variables
    FER_prediction = []
    FERclass = ''
    FERstart = 0
    classifyMoves = 0

    ## Head movement variables
    predictHeadMov = 3
    HeadMov = []
    HMCclass = ''
    detectedFaces = []
    mark = []

    notDone = 0
    progressStatus = [0, 0]

    while notDone in progressStatus:  # This waits for each module to finsih
        if camera == 'kinect':
            frames = listener.waitForNewFrame()

            color = frames["color"]
            color = color.asarray()
            color = cv2.resize(color, (int(873), int(491)))
            color = color[0:480, 150:790]
            color = np.delete(color, np.s_[3::], 2)

        elif camera == 'webcam':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        elif camera == 'video':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        ## Detect facial expression
        predictExpNums = [1, 2]
        if predictExp == 0:
            while predictExp not in predictExpNums:
                predictExp = int(
                    raw_input(
                        "\nPress 1 to detect Facial Expression or press 2 to do Head Movement classification."
                    ))
            if predictExp == 1:
                predictExp = 1
                print "------ Facial Expression Recognition ------"
            elif predictExp == 2:
                predictHeadMov = 0
                progressStatus[0] = 1

            FERstart = time.time()

        elif predictExp == 1:
            currentTime = time.time()
            if currentTime - FERstart < 10:
                FER_prediction.append(
                    facialExpressionRecogntion(color, faceExpModel,
                                               faceCascade))
            else:
                predictExp = 2
                FER_prediction = filter(None, FER_prediction)
                FERclass = mostCommon(FER_prediction)
                FERclass = FERclass[2:7]
                predictHeadMov = 0
                if FERclass == '':
                    print("Did not detect an expression, try again.")
                    predictExp = 0
                else:
                    progressStatus[0] = 1
                    print "Detected expression: " + str(FERclass)
                    print "Progress: FER DONE"
                # else:
                #     #cv2.imwrite("sample_frame.png", color)
                #     break

        ## Detect head movement class
        if predictHeadMov == 0:
            predictHeadMov = int(
                raw_input(
                    "\nPress 1 to do Head Movement classification or 2 to skip."
                ))
            if predictHeadMov == 1:
                predictHeadMov = 1
                print "------ Head Movement Classification ------"
                moveTime = int(
                    raw_input(
                        "How many seconds should I record your movement?"))
                #moveTime = 2
            else:
                predictHeadMov = 2
            timer = time.time()
            previousTime1 = timer
            previousTime2 = timer

        if predictHeadMov == 1:
            print color.shape
            color = color[0:480, 0:480]
            color = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY)
            cv2.imshow("", color)
            raw_input()
            print color.shape
            # Feed frame to image queue.
            img_queue.put(color)

            #pdb.set_trace()

            # Get face from box queue.
            facebox = box_queue.get()
            print color.shape

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = color[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                marks = mark_detector.detect_marks(face_img)

                #Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(color, headPoseDirection, (20, 70), font, 1,
                            (0, 255, 0), 4)

                # # Get average position of nose
                noseMarksTemp = []
                noseMarksTemp.append(marks[30][0])
                noseMarksTemp.append(marks[30][1])
                noseMarks[0] = noseMarksTemp

                for i in range(9, 0, -1):
                    noseMarks[i] = noseMarks[i - 1]

                # Get the direction of head movement
                headPoseDirection = calculateDirection(noseMarks)

                directionArray.append(headPoseDirection)

                if classifyMoves == 0:
                    classifyMoves = 1
                    timer = time.time()
                    previousTime1 = timer
                    previousTime2 = timer

                currentTime = time.time()
                if currentTime - previousTime1 > moveTime and classifyMoves == 1:
                    print "------------------------------------------------"
                    print "Elapsed timer 1: " + str(currentTime -
                                                    previousTime1)

                    # Get most common direction
                    HMCclass = mostCommon(directionArray)

                    classifyMoves = 2
                    directionArray = []
                    previousTime1 = currentTime

                    progressStatus[1] = 1
                    print "Progress: HMC DONE"
            else:
                print "Did not detect a face"
        elif predictHeadMov == 2:
            progressStatus[1] = 1
            print "Skipped Head Movement Estimation."
            break

        # if notDone in progressStatus and predictHeadMov == 2 and predictExp == 2:
        #     print "You seem to have skipped one or more tasks."
        #     inpt = ''
        #     while inpt == '':
        #         inpt = raw_input("To do FER press 1 and to do HMC press 2...")
        #         if input == '1':
        #             predictExp = 1
        #         elif input == '2':
        #             predictHeadMov

        cv2.imshow("", color)
        if camera == 'kinect':
            listener.release(frames)

        key = cv2.waitKey(delay=1)
        if key == ord('q'):
            break

    if camera == 'kinect':
        listener.release(frames)
        device.stop()
        device.close()
    elif camera == 'webcam' or camera == 'video':
        video_capture.release()

    cv2.destroyAllWindows()

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    print "---------------- RESULT ----------------"

    if FERclass != '':
        print "Detected facial expression: " + str(FERclass)
    else:
        print "Did not detect any expression."

    if HMCclass != '':
        print "Detected head movement: " + str(HMCclass)
    else:
        print "Did not detect a head movement."

    print "----------------------------------------"

    intentionClassification = [FERclass, HMCclass]

    return intentionClassification
示例#14
0
 def __init__(self, source_img, dest_img):
     self.source_img = source_img
     self.dest_img = dest_img
     self.dest_pose_estimator = PoseEstimator(cv.imread(self.dest_img))
     self.source_pose_estimator = None  # initialize after cloth extraction
     self.error_list = []
示例#15
0
import os
import cv2 as cv


import sys
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from pose_estimator import PoseEstimator

# test run
SAMPLES_DIR = os.environ['ROOT_DIR']+"/src/flask_app/static/images/samples"
sample_file = SAMPLES_DIR+"/profile_images/sample-profile-image1.jpg"

img = cv.imread(sample_file)
PoseEstimator(img).visualize_pose()
示例#16
0
def main():
    print("Starting processing...")
    start_time = time.time()

    cap = cv2.VideoCapture(video_file)
    assert cap.isOpened(), 'Failed to open video file'

    s, first_frame = cap.read()
    if not (s):
        print("Failed to read dimensions from video file!")
        return

    dimensions = tuple(first_frame.shape[i] * VIDEO_RESCALE_FACTOR
                       for i in range(2))

    pe = PoseEstimator(dimensions)

    with open(output_filename, "w+") as output_file:
        fieldnames = ["timestamp"] + [f"rot_{d}" for d in "xyz"] + [
            f"pos_{d}" for d in "xyz"
        ] + [f"landmark_{i}_{d}" for i in range(68) for d in "xy"]
        mvid_writer = csv.DictWriter(output_file, fieldnames=fieldnames)

        mvid_writer.writeheader()

        last_row_dict = None
        while cap.isOpened():
            s, frame = cap.read()
            if not (s):
                print("No more frames!")
                break

            timestamp = int(cap.get(cv2.CAP_PROP_POS_MSEC))

            success, labelled_frame, landmarks, pose_estimation_pts = detect(
                frame, mark=should_display)

            row_dict = {}
            if success:
                rescaled_frame = rescale_frame(labelled_frame,
                                               VIDEO_RESCALE_FACTOR)

                head_rotation, head_translation = pe.estimate_pose(
                    pose_estimation_pts)

                row_dict["timestamp"] = timestamp
                for i, d in enumerate("xyz"):
                    row_dict[f"rot_{d}"] = head_rotation[i][0]
                    row_dict[f"pos_{d}"] = head_translation[i][0]
                for i, l in enumerate(landmarks):
                    for j, d in enumerate("xy"):
                        row_dict[f"landmark_{i}_{d}"] = l[j]
            else:
                print(f"Couldn't find face at timestamp {timestamp}")
                row_dict = last_row_dict
                row_dict["timestamp"] = timestamp

            mvid_writer.writerow(row_dict)
            last_row_dict = row_dict

            if should_display:
                cv2.imshow(WINDOW_TITLE, frame)
            if cv2.waitKey(
                    1
            ) & 0xFF == 27:  # 27 is ASCII for the Esc key on a keyboard
                break

        cap.release()
        cv2.destroyAllWindows()

    print(f"Time consumed: {time.time() - start_time}")
    print(f"Video feature data exported to: {output_filename}")
示例#17
0
    def process(self, image, output_path=None, output_name=None):
        image_origin = image
        check_dir(output_path)

        cv2.imwrite(join(output_path, output_name + '.origin.png'),
                    image_origin)
        # image_name =
        # image_origin = cv2.imread(image_path)
        image_origin_height, image_origin_width = image_origin.shape[0:2]
        # print('Width', image_origin_width, 'Height', image_origin_height)

        image_crop, image_edge = format_image_rgb(image_origin)
        cv2.imwrite(join(output_path, output_name + '.landmark.crop.png'),
                    image_crop)
        # print('Image Data', image_crop、, 'Image Edge', image_edge)

        image_crop_resize = cv2.resize(image_crop, (128, 128))
        cv2.imwrite(join(output_path, output_name + '.landmark.resize.png'),
                    image_crop_resize)

        # image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
        # print('Image', image_crop_resize)

        predictions = self.sess.run(self.landmark_logits,
                                    feed_dict={self.inputs: image_crop_resize})
        # print(predictions)
        # print('Len predictions', predictions)

        marks = np.array(predictions).flatten()
        marks = np.reshape(marks, (-1, 2))
        # print(marks)

        # width =
        # print('Image edge shape', image_edge)
        # to do multiply
        marks *= (image_edge[2] - image_edge[0])
        marks[:, 0] += image_edge[0]
        marks[:, 1] += image_edge[1]
        # print(marks)

        with open(join(output_path, output_name + '.marks.txt'),
                  'w',
                  encoding='utf-8') as f:
            f.write(json.dumps(marks.tolist()))

        for mark in marks:
            cv2.circle(image_origin, tuple(mark), 3, (255, 0, 0))

        cv2.imwrite(join(output_path, output_name + '.landmark.png'),
                    image_origin)

        pose_estimator = PoseEstimator(img_size=(image_origin_height,
                                                 image_origin_width))
        # pose_estimator
        pose = pose_estimator.solve_pose_by_68_points(marks)
        print('Pose', pose)

        with open(join(output_path, output_name + '.pose.txt'),
                  'w',
                  encoding='utf-8') as f:
            f.write(json.dumps(pose))

        return pose
示例#18
0
from __future__ import print_function
import sys

sys.path.append("../Math")
import math
from pose_estimator import PoseEstimator
from rigidtransform import RigidTransform
from rigidtransform import Twist
from translation import Translation
from rotation import Rotation

import matplotlib.pyplot as plt

estimator = PoseEstimator()

#random values
#12 ft/s, 1 rad/s
twist = Twist(12.0, 1)
expected_end_pose = RigidTransform.exp(twist)
print("Expected Ending Pose: ")
expected_end_pose.print()

plt.ion()
axes = plt.gca()
axes.set_xlim([-20, 20])
axes.set_ylim([-20, 20])


def plot():
    iterations = 100
    l_enc = 0
示例#19
0
from pose_estimator import PoseEstimator
import cv2

model = "posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite"
image = cv2.imread("test.jpg")
pose_estimator = PoseEstimator(model)
coords, scores = pose_estimator(cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB))
'''
0 	nose
1 	leftEye
2 	rightEye
3 	leftEar
4 	rightEar
5 	leftShoulder
6 	rightShoulder
7 	leftElbow
8 	rightElbow
9 	leftWrist
10 	rightWrist
11 	leftHip
12 	rightHip
13 	leftKnee
14 	rightKnee
15 	leftAnkle
16 	rightAnkle
'''
for coord in coords:
    p = (int(coord[1]), int(coord[0]))
    image = cv2.circle(image, p, radius=0, color=(0, 255, 0), thickness=10)
cv2.imwrite("ntest.jpg", image)
示例#20
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))
            right_corner = tuple([int(i) for i in marks[36]])
            left_corner = tuple([int(i) for i in marks[45]])
            # print(marks[36], marks[45])
            cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2)

            pixel_distance = int(
                math.sqrt((right_corner[0] - left_corner[0])**2 +
                          (right_corner[1] - left_corner[1])**2))
            estimated_distance = (real_width * focal_length) / pixel_distance

            cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            print(steady_pose[1])
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            angles = process_eyes(frame, marks)
            if bool(angles) is True:
                # print(angles)
                angles = cvt_to_radians(angles)
                rotated_vector = rotate_vector(steady_pose[0],
                                               angles['right'][0],
                                               angles['right'][1])
                shifted_translation_vector = np.copy(steady_pose[1])
                shifted_translation_vector[0] += 50
                shifted_translation_vector[1] += 50
                pose_estimator.draw_axes(frame, rotated_vector,
                                         shifted_translation_vector)

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#22
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#23
0
def main():

    ''' main function'''

    img_dir = "img_dir/"     # image file dir

    # setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_list = []
    for filename in os.listdir(img_dir):
        frame = cv2.imread(img_dir + filename)
        img_list.append(frame)
        img_queue.put(frame)
    box_process = Process(target=get_face, args=(img_queue, box_queue))
    box_process.start()

    # introduce mark_detector to detect landmarks
    mark_detector = MarkDetector()

    while True:
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip
        # frame = cv2.flip(frame, 2)

        if len(img_list) != 0:
            frame = img_list.pop(0)
            raw_frame = frame.copy()
            # introduce pos estimator to solve pose. Get one frames to setup the
            # estimator according to the images size.
            height, width = frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))
        else:
            break

        # Pose estimation by 3 steps.
        # 1. detect faces.
        # 2. detect landmarks.
        # 3. estimate pose.

        # get face from box queue.
        faceboxes = box_queue.get()
        # print("the length of facebox is " + str(len(faceboxes)))
        for facebox in faceboxes:
            # detect landmarks from image 128 * 128
            face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]    # cut off the area of face.

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)    # BGR -> RGB
            marks = mark_detector.detect_marks(face_img)

            # covert the marks locations from local CNN to global image.
            marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle.
            marks[:, 1] *=  (facebox[3] - facebox[1])  # the length of rectangle.
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # uncomment following line to show raw marks.
            # util.draw_marks(frame, marks, color=(0, 255, 0))
            # util.draw_faceboxes(frame, facebox)

            # try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
           
            # pose[0] is rotation_vector, and pose[1] is translation_vector
            
            pose_np = np.array(pose).flatten()
            pose = np.reshape(pose_np, (-1, 3))

            angle = util.get_angle(pose[0])
            pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2])

        # show preview
        cv2.imshow("Preview", frame)
        cv2.imshow("raw_frame", raw_frame)
        if cv2.waitKey(0) == 27:
            continue

    # clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#24
0
class face_detector():
	def __init__(self):
		# Load the parameters
		self.conf = config()
		# initialize dlib's face detector (HOG-based) and then create the
		# facial landmark predictor
		print("[INFO] loading facial landmark predictor...")
		self.detector = dlib.get_frontal_face_detector()
		self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path)
		
		# grab the indexes of the facial landmarks for the left and
		# right eye, respectively
		(self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
		(self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
		
		# initialize the video stream and sleep for a bit, allowing the
		# camera sensor to warm up
		self.cap = cv2.VideoCapture(0)
		if self.conf.vedio_path == 0:
			self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
		_, sample_frame = self.cap.read()
		
		# Introduce mark_detector to detect landmarks.
		self.mark_detector = MarkDetector()
		
		# Setup process and queues for multiprocessing.
		self.img_queue = Queue()
		self.box_queue = Queue()
		self.img_queue.put(sample_frame)
		self.box_process = Process(target=get_face, args=(
			self.mark_detector, self.img_queue, self.box_queue,))
		self.box_process.start()
		
		# Introduce pose estimator to solve pose. Get one frame to setup the
		# estimator according to the image size.
		self.height, self.width = sample_frame.shape[:2]
		self.pose_estimator = PoseEstimator(img_size=(self.height, self.width))
		
		# Introduce scalar stabilizers for pose.
		self.pose_stabilizers = [Stabilizer(
			state_num=2,
			measure_num=1,
			cov_process=0.1,
			cov_measure=0.1) for _ in range(6)]
		
		self.tm = cv2.TickMeter()
		# Gaze tracking
		self.gaze = GazeTracking()
	
	def detect(self):
		# loop over the frames from the video stream
		temp_steady_pose = 0
		while True:
			# grab the frame from the threaded video stream, resize it to
			# have a maximum width of 400 pixels, and convert it to
			# grayscale
			frame_got, frame = self.cap.read()
			
			# Empty frame
			frame_empty = np.zeros(frame.shape)
			
			# frame = imutils.rotate(frame, 90)
			frame = imutils.resize(frame, width=400)
			gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
			
			# detect faces in the grayscale frame
			rects = self.detector(gray, 0)
			
			# initialize the frame counters and the total number of blinks
			TOTAL = 0
			COUNTER = 0
			# loop over the face detections
			for (i, rect) in enumerate(rects):
				# determine the facial landmarks for the face region, then
				# convert the facial landmark (x, y)-coordinates to a NumPy
				# array
				self.shape = self.predictor(gray, rect)
				self.shape = face_utils.shape_to_np(self.shape)
				
				# ********************************
				# Blink detection
				# extract the left and right eye coordinates, then use the
				# coordinates to compute the eye aspect ratio for both eyes
				self.leftEye = self.shape[self.lStart:self.lEnd]
				self.rightEye = self.shape[self.rStart:self.rEnd]
				self.leftEAR = eye_aspect_ratio(self.leftEye)
				self.rightEAR = eye_aspect_ratio(self.rightEye)
				
				# average the eye aspect ratio together for both eyes
				ear = (self.leftEAR + self.rightEAR) / 2.0
				
				# check to see if the eye aspect ratio is below the blink
				# threshold, and if so, increment the blink frame counter
				if ear < self.conf.EYE_AR_THRESH:
					COUNTER += 1
				
				# otherwise, the eye aspect ratio is not below the blink
				# threshold
				else:
					# if the eyes were closed for a sufficient number of
					# then increment the total number of blinks
					if COUNTER >= self.conf.EYE_AR_CONSEC_FRAMES:
						TOTAL += 1
					
					# reset the eye frame counter
					COUNTER = 0
				
				# Frame empty
				cv2.putText(frame_empty, "Blinks: {}".format(TOTAL), (30, 60),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				cv2.putText(frame_empty, "EAR: {:.2f}".format(ear), (30, 90),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				
				# ********************************
				# convert dlib's rectangle to a OpenCV-style bounding box
				# [i.e., (x, y, w, h)], then draw the face bounding box
				(x, y, w, h) = face_utils.rect_to_bb(rect)
				self.bounding_box = (x, y, w, h)
				# cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
				
				# Frame empty
				cv2.rectangle(frame_empty, (x, y), (x + w, y + h), (0, 255, 0), 2)
				
				# show the face number
				cv2.putText(frame_empty, "Face #{}".format(i + 1), (30, 120),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				
				# loop over the (x, y)-coordinates for the facial landmarks
				# and draw them on the image
				for (x, y) in self.shape:
					# cv2.circle(frame, (x, y), 1, (0, 255, 255), -1)
					cv2.circle(frame_empty, (x, y), 1, (0, 255, 255), -1)
			
			# **********************************************************
			if frame_got is False:
				break
			
			# If frame comes from webcam, flip it so it looks like a mirror.
			if self.conf.vedio_path == 0:
				frame = cv2.flip(frame, 2)
			
			# Pose estimation by 3 steps:
			# 1. detect face;
			# 2. detect landmarks;
			# 3. estimate pose
			
			# Feed frame to image queue.
			self.img_queue.put(frame)
			
			# Get face from box queue.
			self.facebox = self.box_queue.get()
			
			if self.facebox is not None:
				# Detect landmarks from image of 128x128.
				face_img = frame[self.facebox[1]: self.facebox[3],
						   self.facebox[0]: self.facebox[2]]
				face_img = cv2.resize(face_img, (self.conf.CNN_INPUT_SIZE, self.conf.CNN_INPUT_SIZE))
				face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
				
				self.tm.start()
				# marks = self.mark_detector.detect_marks([face_img])
				self.tm.stop()
				
				# Convert the marks locations from local CNN to global image.
				self.shape *= (self.facebox[2] - self.facebox[0])
				self.shape[:, 0] += self.facebox[0]
				self.shape[:, 1] += self.facebox[1]
				
				# Uncomment following line to show raw marks.
				# mark_detector.draw_marks(
				#     frame, marks, color=(0, 255, 0))
				
				# Uncomment following line to show facebox.
				# mark_detector.draw_box(frame, [facebox])
				
				# Try pose estimation with 68 points.
				self.pose = self.pose_estimator.solve_pose_by_68_points(self.shape)
				
				# Stabilize the pose.
				self.steady_pose = []
				pose_np = np.array(self.pose).flatten()
				for value, ps_stb in zip(pose_np, self.pose_stabilizers):
					ps_stb.update([value])
					self.steady_pose.append(ps_stb.state[0])
				self.steady_pose = np.reshape(self.steady_pose, (-1, 3))
				
				# Uncomment following line to draw pose annotation on frame.
				# pose_estimator.draw_annotation_box(
				# 	frame, pose[0], pose[1], color=(255, 128, 128))
				
				# Uncomment following line to draw stabile pose annotation on frame.
				# pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
				
				# Uncomment following line to draw head axes on frame.
				# pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
				self.pose_estimator.draw_axes(frame_empty, self.steady_pose[0], self.steady_pose[1])
				print('steady pose vector: {}'.format(self.steady_pose[0], self.steady_pose[1]))
			else:
				# cv2.putText(frame, "Signal loss", (200, 200),
				# 			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
				cv2.putText(frame_empty, "Signal loss", (200, 200),
							cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
			# ******************************************************************
			# We send this frame to GazeTracking to analyze it
			self.gaze.refresh(frame)
			
			frame = self.gaze.annotated_frame()
			text = ""
			
			if self.gaze.is_blinking():
				text = "Blinking"
			elif self.gaze.is_right():
				text = "Looking right"
			elif self.gaze.is_left():
				text = "Looking left"
			elif self.gaze.is_center():
				text = "Looking center"
			
			cv2.putText(frame_empty, text, (250, 250), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 2)
			
			left_pupil = self.gaze.pupil_left_coords()
			right_pupil = self.gaze.pupil_right_coords()
			cv2.putText(frame_empty, "Left pupil:  " + str(left_pupil), (250, 280), cv2.FONT_HERSHEY_DUPLEX, 0.5,
						(147, 58, 31), 1)
			cv2.putText(frame_empty, "Right pupil: " + str(right_pupil), (250, 310), cv2.FONT_HERSHEY_DUPLEX, 0.5,
						(147, 58, 31), 1)
			
			# ********************************************************************
			# show the frame
			# cv2.imshow("Frame", frame)
			cv2.imshow("Frame", frame_empty)
			key = cv2.waitKey(1) & 0xFF
			
			self.pass_variable = np.array(1)
			
			try:
				self._listener(self.pass_variable)
			except:
				pass
			
			# if the `q` key was pressed, break from the loop
			if key == ord("q"):
				break
		# do a bit of cleanup
		cv2.destroyAllWindows()
		# self.cap.stop()
	
	def set_listener(self, listener):
		self._listener = listener
示例#25
0
class CMate:
    def __init__(self, source_img, dest_img):
        self.source_img = source_img
        self.dest_img = dest_img
        self.dest_pose_estimator = PoseEstimator(cv.imread(self.dest_img))
        self.source_pose_estimator = None  # initialize after cloth extraction
        self.error_list = []

    def cloth_segmentation(self):
        # extract source image and segmented cloth
        try:
            source_img, source_seg = cloth_extractor.extract_cloth(
                self.source_img)

            source_img = cv.cvtColor(source_img, cv.COLOR_RGB2BGR)
            source_seg = cv.cvtColor(source_seg, cv.COLOR_RGB2BGR)
            # print(source_img.shape, source_seg.shape)
            # fill holes
            source_seg = utils.fill_holes(source_img, source_seg)

            return source_img, source_seg
        except Exception:
            raise Exception("Source image without cloth.")

    def get_source_shoulder_details(self, source_img, cloth_seg):
        "get source shoulder distance and rotation angle"

        try:
            # initialize source pose estimator
            self.source_pose_estimator = PoseEstimator(source_img)
            source_distance = self.source_pose_estimator.get_shoulder_details()
            source_points = self.source_pose_estimator.get_shoulder_points()
        except Exception as e:
            print(str(e))
            # if no source shoulder points detected
            self.error_list.append("Issue in source image:" + str(e))
            logging.warning(
                "Using manual shoulder detection for source image.")
            source_points, source_distance = custom_shoulder_locator.get_shoulder_details_mannual(
                cloth_seg)

        return source_points, source_distance

    def apply_cloth(self):
        # step 1: get dest shoulder distance and rotation angle
        try:
            dest_distance = self.dest_pose_estimator.get_shoulder_details()
        except Exception as e:
            raise Exception("Issue in profile image:" + str(e))
        # step 2: get source image and segmented cloth
        source_img, source_seg = self.cloth_segmentation()

        # cv.imwrite("testseg.jpg", source_seg)

        # step 3: get source shoulder distance and rotation angle
        source_points, source_distance = self.get_source_shoulder_details(
            source_img, source_seg)

        # step 4: resize source seg and shoulder points
        if dest_distance < MIN_SHOULDER_DISTANCE:
            raise Exception("Shoulder detection issue in profile image.")
        if source_distance < MIN_SHOULDER_DISTANCE:
            raise Exception("Shoulder detection issue in source image.")
        resize_factor = dest_distance / source_distance
        print("resize factor:", resize_factor)

        source_seg = cv.resize(source_seg,
                               (int(source_seg.shape[1] * resize_factor),
                                int(source_seg.shape[0] * resize_factor)))

        source_points[0] = utils.resize_shoulder_coord(source_points[0],
                                                       resize_factor)
        source_points[1] = utils.resize_shoulder_coord(source_points[1],
                                                       resize_factor)
        """
        # step 5: rotate source seg and shoulder points
        rotation_angle = dest_angle - source_angle
        # clip angle between [-10,10]
        if abs(rotation_angle) > 5:
            self.error_list.append("Source Image is rotated: %f" % rotation_angle)
        rotation_angle = max(-10, min(rotation_angle, 10))
        print("rotation angle:", rotation_angle)
        rotated_seg = imutils.rotate(source_seg, rotation_angle)
        source_points = utils.rotate_shoulder_points(source_seg, source_points,
                                                     rotation_angle)
        """

        # step 5.2: remove border
        _, source_seg = utils.remove_segmentation_border(source_seg)

        # step 6: blend dest image and extracted cloth
        dest_frame = cv.imread(self.dest_img)
        dest_points = self.dest_pose_estimator.get_shoulder_points()
        try:
            final_img = utils.blend_images(source_seg, source_points,
                                           dest_frame, dest_points)
        except AssertionError:
            print("Assertion Error in blending images.")
            raise Exception("Issue in blending Images.")
        except Exception as e:
            print(str(e))
            raise Exception("Issue in blending Images.")
        return final_img, self.error_list
示例#26
0
def main(strargument):
    shutil.rmtree("./test")
    os.mkdir("./test")
    os.remove("result.txt")
    f = open("result.txt", "a")
    cap = cv2.VideoCapture(strargument)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap = cv2.VideoCapture("NTQ.mkv")
    #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi")
    #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg")
    # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview")
    success, frame = cap.read()
    startId = countIdFolder("./face_db/")
    # quit if unable to read the video file
    if not success:
        print('Failed to read video')
        sys.exit(1)
    #The color of the rectangle we draw around the face
    rectangleColor = (0, 165, 255)
    #variables holding the current frame number and the current faceid
    frameCounter = 0
    currentFaceID = 0
    #Variables holding the correlation trackers and the name per faceid
    conf = configparser.ConfigParser()
    conf.read("config/main.cfg")
    mtcnn_detector = load_mtcnn(conf)
    MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH")
    VERIFICATION_THRESHOLD = float(
        conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD"))
    FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH")
    BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH"))
    MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE"))
    MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL"))
    YAWL = int(conf.get("CUSTOM", "YAWL"))
    YAWR = int(conf.get("CUSTOM", "YAWR"))
    PITCHL = int(conf.get("CUSTOM", "PITCHL"))
    PITCHR = int(conf.get("CUSTOM", "PITCHR"))
    ROLLL = int(conf.get("CUSTOM", "ROLLL"))
    ROLLR = int(conf.get("CUSTOM", "ROLLR"))
    MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED"))
    IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH"))
    EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y"))
    EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X"))
    SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH"))
    MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN"))
    MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE"))
    LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME"))
    ROIXL = int(conf.get("CUSTOM", "ROIXL"))
    ROIXR = int(conf.get("CUSTOM", "ROIXR"))
    ROIYB = int(conf.get("CUSTOM", "ROIYB"))
    ROIYA = int(conf.get("CUSTOM", "ROIYA"))
    maxDisappeared = MAXDISAPPEARED  ## khong xuat hien toi da 100 frame
    faces_db = load_faces(FACE_DB_PATH, mtcnn_detector)
    # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector)
    time.sleep(10)
    for item in faces_db:
        print(item["name"])
    listTrackedFace = []
    mark_detector = MarkDetector()
    tm = cv2.TickMeter()
    _, sample_frame = cap.read()
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    with tf.Graph().as_default():
        with tf.Session() as sess:
            load_mobilefacenet(MODEL_PATH)
            inputs_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            try:
                start = time.time()
                while True:
                    start1 = time.time()
                    retval, frame = cap.read()

                    #Increase the framecounter
                    frameCounter += 1
                    if retval:
                        _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR]
                        cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB),
                                      (0, 0, 255), 2)
                        good_face_index = []
                        # faces_db = load_face_db.face_db
                        if (frameCounter % 1) == 0:
                            ### embed and compare name
                            for i, face_db in enumerate(faces_db):
                                if not os.path.isdir(
                                        "./face_db/" +
                                        face_db["name"].split("_")[0]):
                                    faces_db.pop(i)
                            faces, landmarks = mtcnn_detector.detect(_frame)
                            if faces.shape[0] is not 0:
                                input_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                save_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                (yaw, pitch, roll) = (0, 0, 0)
                                for i, face in enumerate(faces):
                                    if round(faces[i, 4], 6) > IS_FACE_THRESH:
                                        bbox = faces[i, 0:4]
                                        points = landmarks[i, :].reshape(
                                            (5, 2))
                                        nimg = face_preprocess.preprocess(
                                            _frame,
                                            bbox,
                                            points,
                                            image_size='112,112')
                                        save_images[i, :] = nimg
                                        nimg = nimg - 127.5
                                        nimg = nimg * 0.0078125
                                        input_images[i, :] = nimg
                                        (x1, y1, x2, y2) = bbox.astype("int")
                                        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2:
                                            continue
                                        temp = int((y2 - y1) / EXTEND_Y)
                                        y1 = y1 + temp
                                        y2 = y2 + temp
                                        temp = int((x2 - x1) / EXTEND_X)
                                        if x1 > temp:
                                            x1 = x1 - temp
                                        x2 = x2 + temp
                                        # cv2.imshow("mainframe",frame)
                                        # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2])
                                        face_img = cv2.resize(
                                            _frame[y1:y2, x1:x2], (128, 128))
                                        # cv2.imshow("ok",face_img)
                                        face_img = cv2.cvtColor(
                                            face_img, cv2.COLOR_BGR2RGB)
                                        tm.start()
                                        marks = mark_detector.detect_marks(
                                            [face_img])
                                        tm.stop()
                                        marks *= (x2 - x1)
                                        marks[:, 0] += x1
                                        marks[:, 1] += y1
                                        # mark_detector.draw_marks(
                                        #         frame, marks, color=(0, 255, 0))
                                        pose, (
                                            yaw, pitch, roll
                                        ) = pose_estimator.solve_pose_by_68_points(
                                            marks)
                                        # temp = frame
                                        # cv2.putText(temp,"yaw:  "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"roll:   "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.imshow("frame",temp)
                                        # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR):
                                        #     good_face_index.append(i)
                                        # cv2.waitKey(0)
                                # print(good_face_index)
                                feed_dict = {inputs_placeholder: input_images}
                                emb_arrays = sess.run(embeddings,
                                                      feed_dict=feed_dict)
                                emb_arrays = sklearn.preprocessing.normalize(
                                    emb_arrays)
                                names = []
                                sims = []
                                for i, embedding in enumerate(emb_arrays):
                                    # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False:
                                    #     names.append(listTrackedFace[i].name)
                                    #     continue
                                    embedding = embedding.flatten()
                                    temp_dict = {}
                                    for com_face in faces_db:
                                        ret, sim = feature_compare(
                                            embedding, com_face["feature"],
                                            0.65)
                                        temp_dict[com_face["name"]] = sim
                                    # print(temp_dict)
                                    dictResult = sorted(temp_dict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    # print(dictResult[:5])
                                    name = ""
                                    if len(dictResult) > 0 and dictResult[0][
                                            1] > VERIFICATION_THRESHOLD:
                                        name = dictResult[0][
                                            0]  #.split("_")[0]
                                        sim = dictResult[0][1]
                                        ## wite log
                                        t = time.time()
                                        f.write(name + "___" +
                                                str((t - start) // 60) + ":" +
                                                str(int(t - start) % 60) +
                                                "\n")
                                    else:
                                        name = "unknown"
                                        sim = 0
                                    names.append(name)
                                    sims.append(sim)

                                    # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:])
                                    # if len(dictResult)>0 :
                                    # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:])
                                    ################################ tracker
                                for i, embedding in enumerate(emb_arrays):
                                    embedding = embedding.flatten()
                                    ResultDict = {}
                                    for objectTrackFace in listTrackedFace:
                                        tempList = []
                                        (x1, y1, x2,
                                         y2) = objectTrackFace.latestBox
                                        for com_face in objectTrackFace.listEmbedding:
                                            ret, sim = feature_compare(
                                                embedding, com_face, 0.65)
                                            tempList.append(sim)
                                        tempList.sort(reverse=True)
                                        if len(tempList) > 0:
                                            if tempList[0] > 0.9 or (
                                                    similarIOU(
                                                        faces[i, :4].astype(
                                                            "int"),
                                                        objectTrackFace.
                                                        latestBox) and
                                                (frameCounter - objectTrackFace
                                                 .latestFrameCounter) < 3):
                                                ResultDict[objectTrackFace.
                                                           name] = tempList[0]
                                    dictResult = sorted(ResultDict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    if True:
                                        if len(
                                                ResultDict
                                        ) > 0 and dictResult[0][
                                                1] > SIMILAR_THRESH:  ## neu khop -- 0.5
                                            # for ik in range(len(dict)):
                                            #     if dict[ik][1]>SIMILAR_THRESH:

                                            nameTrackCurrent = dictResult[0][0]
                                            for index, tempFaceTrack in enumerate(
                                                    listTrackedFace):
                                                if tempFaceTrack.name == nameTrackCurrent:
                                                    if len(tempFaceTrack.
                                                           listImage
                                                           ) > MAX_LIST_LEN:
                                                        tempFaceTrack.listImage.pop(
                                                            0)
                                                        tempFaceTrack.listEmbedding.pop(
                                                            0)
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    else:
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    if names[i] != "unknown":
                                                        if RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            tempFaceTrack.name = names[
                                                                i]
                                                        # else: #################
                                                        #     names[i] = nameTrackCurrent
                                                    else:
                                                        if not RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            names[
                                                                i] = nameTrackCurrent
                                                    tempFaceTrack.countDisappeared = 0
                                                    tempFaceTrack.latestBox = faces[
                                                        i, 0:4].astype("int")
                                                    tempFaceTrack.latestFrameCounter = frameCounter
                                                    tempFaceTrack.liveTime = 0
                                                    tempFaceTrack.justAdded = True  ## but we still action with it
                                                    break

                                        else:  ## neu khong khop thi tao moi nhung chi them anh khi mat du tot
                                            if len(ResultDict) > 0:
                                                print(dictResult[0][1])
                                            if names[i] != "unknown":
                                                newTrackFace = trackedFace(
                                                    names[i])
                                            else:
                                                newTrackFace = trackedFace(
                                                    str(currentFaceID))
                                                currentFaceID = currentFaceID + 1
                                            if measureGoodFace(
                                                    MIN_FACE_SIZE,
                                                    MAX_BLACK_PIXEL,
                                                    save_images[i, :], yaw,
                                                    pitch, roll, BLUR_THRESH,
                                                    YAWL, YAWR, PITCHL, PITCHR,
                                                    ROLLL, ROLLR):
                                                newTrackFace.listImage.append(
                                                    save_images[i, :])
                                                newTrackFace.listEmbedding.append(
                                                    emb_arrays[i])
                                            newTrackFace.latestBox = faces[
                                                i, 0:4].astype("int")
                                            newTrackFace.latestFrameCounter = frameCounter
                                            # print(newTrackFace.latestBox)
                                            newTrackFace.justAdded = True
                                            listTrackedFace.append(
                                                newTrackFace)  ## add list
                                ### disappeared
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    if trackFace.justAdded == False:
                                        trackFace.countDisappeared = trackFace.countDisappeared + 1
                                        trackFace.liveTime = trackFace.liveTime + 1
                                    else:
                                        trackFace.justAdded = False
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                                for i, face in enumerate(faces):
                                    x1, y1, x2, y2 = faces[i][0], faces[i][
                                        1], faces[i][2], faces[i][3]
                                    x1 = max(int(x1), 0)
                                    y1 = max(int(y1), 0)
                                    x2 = min(int(x2), _frame.shape[1])
                                    y2 = min(int(y2), _frame.shape[0])
                                    cv2.rectangle(frame,
                                                  (x1 + ROIXL, y1 + ROIYA),
                                                  (x2 + ROIXL, y2 + ROIYA),
                                                  (0, 255, 0), 2)
                                    # if i in good_face_index:
                                    # if not RepresentsInt(names[i]):
                                    cv2.putText(frame, names[i].split("_")[0],
                                                (int(x1 / 2 + x2 / 2 + ROIXL),
                                                 int(y1 + ROIYA)),
                                                cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                (255, 255, 255), 2)
                            else:
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    trackFace.countDisappeared = trackFace.countDisappeared + 1
                                    trackFace.liveTime = trackFace.liveTime + 1
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                        continue
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                            end = time.time()
                            cv2.putText(frame,
                                        "FPS: " + str(1 // (end - start1)),
                                        (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 0, 0), 3)
                            cv2.putText(
                                frame, "Time: " + str((end - start) // 60) +
                                ":" + str(int(end - start) % 60), (200, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
                        cv2.imshow("frame", frame)
                        key = cv2.waitKey(30)
                        if key & 0xFF == ord('q'):
                            break
                        if key == 32:
                            cv2.waitKey(0)
                    else:
                        break
            except KeyboardInterrupt as e:
                pass
    gc.collect()
    cv2.destroyAllWindows()
    exit(0)
    def __init__(self):
        self.POSE_ESTIMATOR = PoseEstimator()
        self.FRAME_OPS = FrameOperations()

        self.FIRST = True
示例#28
0
    random.shuffle(results)
    return results

    return paths[best_option]


if __name__ == '__main__':
    if len(sys.argv) != 5:
        sys.exit('Requires a database, an embedder weights file,' +
                 'and a pose estimator weights file')

    database_file = sys.argv[1]
    facenet_protobuf = sys.argv[2]
    pose_weights = sys.argv[3]
    input_image_path = sys.argv[4]

    sqlite3.register_adapter(np.ndarray, adapt_array)
    sqlite3.register_converter("array", convert_array)
    conn = sqlite3.connect(database_file, detect_types=sqlite3.PARSE_DECLTYPES)

    embedder = Embedder(facenet_protobuf)
    pose_estimator = PoseEstimator(pose_weights)

    input_image = Image.open(input_image_path)
    input_image = crop_to_face(input_image)

    results = get_best_match(conn, embedder, pose_estimator, input_image)

    for item in results:
        print(item)
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    thread = threading.Thread(target=get_face,
                              args=(mark_detector, img_queue, box_queue))
    thread.daemon = True
    thread.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False: