def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
                              [355.48022, 222.92123], [342.7749, 221.94604]])
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
示例#4
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#5
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#7
0
def process_image(transform, processing_model, img):
    global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm, width, height
    tracks = []
    try:
        frame = img
        h, w, d = frame.shape
        if pose_estimator is None or w != width or h != height:
            # sample_frame = frame
            # img_queue.put(sample_frame)

            # Introduce pose estimator to solve pose. Get one frame to setup the
            # estimator according to the image size.
            height, width = h, w
            # (height, width) = (1062 , 485)
            # (height, width) = (720 , 1280)
            pose_estimator = PoseEstimator(img_size=(height, width))

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks(face_img)
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
        img = frame
    except Exception as e:
        track = traceback.format_exc()
        print(track)
        print("HandPose Exception", e)
        pass

    return tracks, img
示例#8
0
            marks = mark_detector.detect_marks(face_img)
            tm.stop()

            # Convert the locations from local face area to the global image.
            marks *= (x2 - x1)
            marks[:, 0] += x1
            marks[:, 1] += y1

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # All done. The best way to show the result would be drawing the
            # pose on the frame in realtime.

            # Do you want to see the pose annotation?
            pose_estimator.draw_annotation_box(
                frame, pose[0], pose[1], color=(0, 255, 0))

            # Do you want to see the head axes?
            # pose_estimator.draw_axes(frame, pose[0], pose[1])

            # Do you want to see the marks?
            # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Do you want to see the facebox?
            # mark_detector.draw_box(frame, [facebox])

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(1) == 27:
            break
示例#9
0
class PoseHeadNode(object):
    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \
                                queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()

    def init(self):

        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(6)
        ]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))

    def pose_cb(self, data):

        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x,
                                body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)

    def draw_pose(self, img, humans, parts=[4, 7]):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        image_h, image_w, _ = img.shape
        if parts is None or len(parts) == 0:
            return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        else:

            for human in humans:
                for part in parts:

                    if part in human.body_parts.keys():
                        body_part = human.body_parts[part]
                        coord = (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))
                        img = cv2.circle(img, coord, 2, (0, 255, 0))
            return img

    def color_callback(
        self, data
    ):  # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480, 640, 3))
        #print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def get_top_vertice_human(self, human, img_w, img_h):
        sorted_id = sorted(human["pose"].body_parts.keys())
        body_part = human["pose"].body_parts[sorted_id[0]]
        return (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))

    def is_gazing(self, face_direction, human, img):
        def get_angle_direct_with_peakhand(face_direction, peak):
            d0, d1 = face_direction
            peak_direction = (peak[0] - d0[0], peak[1] - d0[1])
            face_direct = (d1[0] - d0[0], d1[1] - d0[1])

            d_peak_direction = np.sqrt(peak_direction[0]**2 +
                                       peak_direction[1]**2)
            d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2)
            return (peak_direction[0]*face_direct[0] + \
                    peak_direction[1]*face_direct[1])/(d_peak_direction*\
                    d_face_direct)

        thresh = 0.1  #0.5 #cos(PI/3)

        image_h, image_w, _ = img.shape
        if 4 in human.body_parts.keys():
            body_part = human.body_parts[4]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle left = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        if 7 in human.body_parts.keys():
            body_part = human.body_parts[7]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle right = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        return False

    def get_gazing_status(self, face_directions, face_coords, humans, img):
        def get_dist_to_humans(human_coords, coord):
            result = []

            for _coord in human_coords:
                result.append(np.sqrt((coord[0]-_coord[0])**2) + \
                        (coord[1]-_coord[1])**2)
            return result

        image_h, image_w, _ = img.shape
        gazing_status = []

        human_coords = []

        for human in humans:

            sorted_id = sorted(human.body_parts.keys())
            body_part = human.body_parts[sorted_id[0]]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))

            human_coords.append(coord)

        for fdirection, fcoord in zip(face_directions, face_coords):
            dists = get_dist_to_humans(human_coords, fcoord)
            idx = np.argmin(dists)
            gazing_status.append(self.is_gazing(fdirection, humans[idx], img))
            del human_coords[idx]

        return gazing_status

    def publish_headpose_info(self, viz=True):

        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None or self.humans is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480 * 2, 640 * 2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes = self.mark_detector.extract_cnn_facebox(frame)
        mess = "Not detect pose"
        face_directions = []
        face_coords = []

        if faceboxes is not None and len(faceboxes) > 0:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                #facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_coords.append((int((facebox[0]+facebox[2])/2), \
                                int((facebox[1]+facebox[3])/2)))
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                face_direction = self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))
                face_directions.append(face_direction)
                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])
        gazing_status = self.get_gazing_status(face_directions, face_coords, \
                copy.deepcopy(self.humans), self.color_img)

        msg = GazingInfo()
        msg.is_gazing = []
        msg.coords = []

        for coord, gazestatus in zip(face_coords, gazing_status):
            _coord = Coord2D()
            _coord.x = coord[0]
            _coord.y = coord[1]
            msg.coords.append(_coord)
            msg.is_gazing.append(gazestatus)

        self.pub_headpose_info.publish(msg)

        print(gazing_status)
        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
示例#10
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        engine.say(
            "Warning: video source not assigned, default webcam will be used")
        engine.runAndWait()
        video_src = 0
    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    gaze = GazeTracking()
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    head_flag = 0
    gaze_flag = 0
    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]
        #audio_record(AUDIO_OUTPUT, 3)
        #sphinx_recog(AUDIO_OUTPUT)
        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()
        gaze.refresh(frame)
        frame = gaze.annotated_frame()
        text = ""
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            rects = detector(gray, 0)
            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()
            #
            for rect in rects:
                # determine the facial landmarks for the face region, then
                # convert the facial landmark (x, y)-coordinates to a NumPy
                # array
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(
                    shape)  #converting to NumPy Array矩阵运算
                mouth = shape[Start:End]
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]
                leftEAR = eye_aspect_ratio(leftEye)  #眼睛长宽比
                rightEAR = eye_aspect_ratio(rightEye)
                ear = (leftEAR + rightEAR) / 2.0  #长宽比平均值
                lipdistance = lip_distance(shape)

                if (lipdistance > YAWN_THRESH):
                    #print(lipdistance)
                    flag0 += 1
                    print("yawning time: ", flag0)
                    if flag0 >= 40:
                        cv2.putText(frame, "Yawn Alert", (10, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        cv2.putText(frame, "Yawn Alert", (220, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        engine.say("don't yawn")
                        engine.runAndWait()
                        flag0 = 0
                else:
                    flag0 = 0

                if (ear < thresh):
                    flag += 1
                    print("eyes closing time: ", flag)
                    if flag >= frame_check:
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        engine.say("open your eyes")
                        engine.runAndWait()
                        flag = 0
                else:
                    flag = 0
                if gaze.is_right():
                    print("Looking right")
                    text = "Looking right"
                elif gaze.is_left():
                    print("Looking left")
                    text = "Looking left"
                elif gaze.is_up():
                    text = "Looking up"
                else:
                    text = "Looking center"
                if text is not "Looking center":
                    gaze_flag += 1
                    if gaze_flag >= 20:
                        engine.say("look forward")
                        engine.runAndWait()
                        gaze_flag = 0
                else:
                    gaze_flag = 0
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            # get angles
            angles = pose_estimator.get_angles(pose[0], pose[1])
            if ((-8 > angles[0] or angles[0] > 8)
                    or (-8 > angles[1] or angles[1] > 8)):
                head_flag += 1
                if head_flag >= 40:
                    print(angles[0])
                    engine.say("please look ahead")
                    engine.runAndWait()
            else:
                head_flag = 0
            # pose_estimator.draw_info(frame, angles)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               pose[0],
                                               pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
            #pose_estimator.show_3d_model

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main(video_src):
    """MAIN
    
    :param video_src: Source of video to analyze
    :type video_src: str or int"""
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    face_detector = FaceDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(mark_detector, img_queue, box_queue,))
    #box_process = Process(target=get_faces, args=(face_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    #face expression recognizer initialization
    from keras.models import model_from_json
    model = model_from_json(open("./model/facial_expression_model_structure.json", "r").read())
    model.load_weights('./model/facial_expression_model_weights.h5') #load weights

    #-----------------------------

    emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')

    while True:
        input()
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.

        #facebox = box_queue.get()

        faceboxes = dump_queue(box_queue)
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            detected_face = frame[facebox[1]: facebox[3],facebox[0]: facebox[2]] #crop detected face
            detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale
            detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48



            # emotion estimation
            img_pixels = image.img_to_array(detected_face)
            img_pixels = np.expand_dims(img_pixels, axis = 0)

            img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1]

            predictions = model.predict(img_pixels) #store probabilities of 7 expressions

            #find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral
            max_index = np.argmax(predictions[0])

            emotion = emotions[max_index]

            #write emotion text above rectangle

            #V_FONT_HERSHEY_SIMPLEX normal size sans-serif font
            #CV_FONT_HERSHEY_PLAIN small size sans-serif font
            #CV_FONT_HERSHEY_DUPLEX normal size sans-serif font (more complex than CV_FONT_HERSHEY_SIMPLEX )
            #CV_FONT_HERSHEY_COMPLEX normal size serif font
            #CV_FONT_HERSHEY_TRIPLEX normal size serif font (more complex than CV_FONT_HERSHEY_COMPLEX )
            #CV_FONT_HERSHEY_COMPLEX_SMALL smaller version of CV_FONT_HERSHEY_COMPLEX
            #CV_FONT_HERSHEY_SCRIPT_SIMPLEX hand-writing style font
            #CV_FONT_HERSHEY_SCRIPT_COMPLEX more complex variant of CV_FONT_HERSHEY_SCRIPT_SIMPLEX

            image_text = ""
            for index in range(len(emotions)):
                if predictions[0][index]>0.3:
                    image_text += "{0} : {1} %\n".format(emotions[index], int(predictions[0][index]*100))

            space = 0
            for text in image_text.strip().split("\n"):
                cv2.putText(
                img = frame,
                text= text,
                org =(int(facebox[0]), int(facebox[1])-space),
                fontFace = cv2.FONT_HERSHEY_PLAIN,
                fontScale = 0.8,
                color   =  (255,255,255),
                thickness = 1
                )
                space += int(0.25*48)



            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)


            # Stabilize the pose.
            #stabile_pose = []
            #pose_np = np.array(pose).flatten()
            #for value, ps_stb in zip(pose_np, pose_stabilizers):
            #    ps_stb.update([value])
            #    stabile_pose.append(ps_stb.state[0])
            #stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                 frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))


        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#12
0
class HeadPoseEstimator():
    def __init__(self):
        print "OpenCV version: {}".format(cv2.__version__)
        self.CNN_INPUT_SIZE = 128

        self.mark_detector = MarkDetector()

        height = 480
        width = 640
        self.pose_estimator = PoseEstimator(img_size=(height, width))

        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(8)
        ]

        self.img_queue = Queue()
        self.box_queue = Queue()
        self.box_process = Process(target=self.get_face,
                                   args=(
                                       self.mark_detector,
                                       self.img_queue,
                                       self.box_queue,
                                   ))

        self.process_ctrl_flag = rospy.get_param("process_ctrl", True)
        self.process_ctrl_subscriber = rospy.Subscriber(
            "/head_pose_estimator/process_ctrl", Bool,
            self.process_ctrl_callback)

        self.sub_img_name = rospy.get_param("sub_img_name",
                                            "/usb_cam/image_raw")
        self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image,
                                               self.img_callback)

        self.show_result_img_flag = rospy.get_param("show_result_img", True)
        self.show_axis_flag = rospy.get_param("show_axis", True)
        self.show_annotation_box_flag = rospy.get_param(
            "show_annotation_box", True)
        self.pub_result_img_flag = rospy.get_param("pub_result_img", True)

        self.img_publisher = rospy.Publisher(
            "/head_pose_estimator/result_image", Image, queue_size=10)

        self.result_publisher = rospy.Publisher(
            "/head_pose_estimator/head_pose", HeadPose, queue_size=10)

        self.box_process.start()

    def get_face(self, detector, img_queue, box_queue):
        """
        画像キューから顔を取り出します。
        処理はマルチプロセッシングで行います。
        """
        while not rospy.is_shutdown():
            image = img_queue.get()
            box = detector.extract_cnn_facebox(image)
            box_queue.put(box)

    def process_ctrl_callback(self, msg):
        self.process_ctrl_flag = msg.data

    def euler_to_quaternion(self, euler):
        # オイラー角からクォータニオンに変換
        # 鏡写しになるように値を調整
        q = tf.transformations.quaternion_from_euler(euler.x, euler.y, euler.z)
        return Quaternion(x=q[1], y=-q[2], z=-q[3], w=q[0])
        #return Quaternion(x=q[0], y=q[1], z=q[2], w=q[3])

    def img_callback(self, img_msg):
        if self.process_ctrl_flag == True:
            try:
                cv_img = CvBridge().imgmsg_to_cv2(img_msg, "bgr8")
                flip_frame = cv2.flip(cv_img, 2)  # 画像を鏡写しにする
                frame = cv_img

                img_height, img_width, _ = frame.shape[:3]

                self.img_queue.put(frame)

                facebox = self.box_queue.get()

                # 顔の検出に成功
                if facebox is not None:
                    # 画像から顔をトリミングしてCNNでlandmarksを見つける
                    # img[top : bottom, left : right]
                    face_img = frame[facebox[1]:facebox[3],
                                     facebox[0]:facebox[2]]
                    face_img = cv2.resize(
                        face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE))
                    face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                    face_rect = FaceRect()
                    face_rect.x = facebox[0]
                    face_rect.y = facebox[1]
                    face_rect.width = facebox[2] - facebox[0]
                    face_rect.height = facebox[3] - facebox[1]

                    #face_rect.x = img_width - facebox[0] - (facebox[2] - facebox[0])
                    #face_rect.y = facebox[1]
                    #face_rect.width = facebox[2] - facebox[0]
                    #face_rect.height = facebox[3] - facebox[1]

                    marks = self.mark_detector.detect_marks([face_img])

                    marks *= (facebox[2] - facebox[0])
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]

                    # Uncomment following line to show raw marks.
                    #self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                    # Uncomment following line to show facebox.
                    #self.mark_detector.draw_box(frame, [facebox])

                    pose = self.pose_estimator.solve_pose_by_68_points(marks)
                    #rotation = pose[0]
                    #euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2])
                    #quaternion = self.euler_to_quaternion(euler)

                    # stabilize the pose
                    steady_pose = []
                    pose_np = np.array(pose).flatten()
                    for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                        ps_stb.update([value])
                        steady_pose.append(ps_stb.state[0])
                    steady_pose = np.reshape(steady_pose, (-1, 3))

                    rotation = steady_pose[0]
                    euler = Vector3(x=rotation[0],
                                    y=rotation[1],
                                    z=rotation[2])
                    head_rotation = self.euler_to_quaternion(euler)
                    #print head_rotation
                    #print "---"

                    head_pose = HeadPose()
                    head_pose.face_rect = face_rect
                    head_pose.head_rotation = head_rotation
                    self.result_publisher.publish(head_pose)

                    # Uncomment following line to draw pose annotation on frame.
                    #self.pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128))

                    # Uncomment following line to draw stabile pose annotation on frame.
                    if self.show_annotation_box_flag == True:
                        self.pose_estimator.draw_annotation_box(frame,
                                                                steady_pose[0],
                                                                steady_pose[1],
                                                                color=(128,
                                                                       255,
                                                                       128))

                    # Uncomment following line to draw head axes on frame.
                    # openCV3.4.0では動かない
                    #self.pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

                    if self.show_axis_flag == True:
                        self.pose_estimator.draw_axis(frame, steady_pose[0],
                                                      steady_pose[1])

                if self.show_result_img_flag == True:
                    cv2.imshow("result", frame)
                    cv2.waitKey(1)

                if self.pub_result_img_flag == True:
                    try:
                        pub_img = CvBridge().cv2_to_imgmsg(frame, "bgr8")
                        self.img_publisher.publish(pub_img)

                    except CvBridgeError, e:
                        rospy.logerr(str(e))

            except CvBridgeError, e:
                rospy.logerr(str(e))
示例#13
0
class PoseHeadNode(object):

    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \
        #                        queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()
        
    
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
    
    def pose_cb(self, data):
        
        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)
    
    def draw_pose(self, img, humans):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)

    def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480,640, 3))
        print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def publish_headpose_info(self, viz=True):
        
        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480*2, 640*2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes, landmark = self.detector.detect(frame, \
                thresh, scales=scales, do_flip=False)
        mess = "Not detect pose"
        
        if faceboxes is not None and len(faceboxes) > 0:
            #if isinstance(faceboxes[1], int):
            #    faceboxes = [faceboxes]
            for facebox in faceboxes:
                facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                                facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(
                     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess=round(1/(t2-t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
                
                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)