Exemplo n.º 1
0
    def get_all_poses(self, image, mark_detector=None, face_detector=None, draw=False, draw_marks=False):
        """
        Get all the poses in an image
        """
        CNN_INPUT_SIZE = 128

        print("Initializing detectors")
        # Introduce mark_detector to detect landmarks.
        if mark_detector is None:
            from .mark_detector import MarkDetector
            mark_detector = MarkDetector()
        if face_detector is None:
            from .mark_detector import FaceDetector
            face_detector = FaceDetector()

        confidences, faceboxes = face_detector.get_faceboxes(
            image, threshold=0.2)

        poses = []
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = image[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            if draw_marks:
                mark_detector.draw_marks(image, marks, color=(0, 0, 255))

            detected_face = image[facebox[1]: facebox[3],
                                  facebox[0]: facebox[2]]  # crop detected face
            detected_face = cv2.cvtColor(
                detected_face, cv2.COLOR_BGR2GRAY)  # transform to gray scale
            detected_face = cv2.resize(
                detected_face, (48, 48))  # resize to 48x48

            # Try pose estimation with 68 points.
            pose = self.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)

            poses.append((pose[0].copy(), pose[1].copy()))

            if draw:
                self.draw_annotation_box(
                    image, pose[0], pose[1], color=(255, 128, 128))

        return poses
Exemplo n.º 2
0
def webcam_main():
    print("Camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    time.sleep(2.0)

    mark_detector = MarkDetector()
    
    # loop over the frames from the video stream
    while True:
        _, frame = vs.read()
        start = cv2.getTickCount()

        frame = imutils.resize(frame, width=750, height=750)
        frame = cv2.flip(frame, 1)
        faceboxes = mark_detector.extract_cnn_facebox(frame)

        if faceboxes is not None:
            for facebox in faceboxes:
                # Detect landmarks from image of 64X64 with grayscale.
                face_img = frame[facebox[1]: facebox[3],
                                    facebox[0]: facebox[2]]
                # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2)
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
                face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1)

                land_start_time = time.time()
                marks = mark_detector.detect_marks_keras(face_img0)
                # marks *= 255
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]
                # Draw Predicted Landmarks
                mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2)

        fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency()
        cv2.putText(frame, '%.1ffps'%(1/fps_time) , (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0))
        # show the frame
        cv2.imshow("Frame", frame)
        # writer.write(frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

    # do a bit of cleanup
    cv2.destroyAllWindows()
    vs.stop()
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
Exemplo n.º 4
0
def webcam_main():
    print("Camera sensor warming up...")
    cv2.namedWindow('face landmarks', cv2.WINDOW_NORMAL)
    frame = cv2.imread(IMAGE_PATH)
    time.sleep(2.0)

    mark_detector = MarkDetector(current_model, CNN_INPUT_SIZE)
    if current_model.split(".")[-1] == "pb":
        run_model = 0
    elif current_model.split(".")[-1] == "hdf5" or current_model.split(
            ".")[-1] == "h5":
        run_model = 1
    else:
        print("input model format error !")
        return

    faceboxes = mark_detector.extract_cnn_facebox(frame)
    print(faceboxes)

    if faceboxes is not None:
        facebox = faceboxes[0]

        # Detect landmarks from image of 64X64 with grayscale.
        face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
        cv2.rectangle(frame, (facebox[0], facebox[1]),
                      (facebox[2], facebox[3]), (0, 255, 0), 2)
        face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
        face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
        face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1)

        if run_model == 1:
            marks = mark_detector.detect_marks_keras(face_img0)
        else:
            marks = mark_detector.detect_marks_tensor(face_img0, 'input_2:0',
                                                      'output/BiasAdd:0')
        # marks *= 255
        marks *= facebox[2] - facebox[0]
        marks[:, 0] += facebox[0]
        marks[:, 1] += facebox[1]
        # Draw Predicted Landmarks
        mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2)

    # loop over the frames from the video stream
    while True:
        start = cv2.getTickCount()

        # frame = cv2.resize(frame, (750,750))
        # frame = cv2.flip(frame, 1)

        # fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency()
        # cv2.putText(frame, '%.1ffps'%(1/fps_time), (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0))
        # show the frame
        cv2.imshow("face landmarks", frame)
        # writer.write(frame)
        key = cv2.waitKey(1)

        # if the `q` key was pressed, break from the loop
        if key == ord("q") or key == 0xFF:
            break

    # do a bit of cleanup
    cv2.destroyAllWindows()
Exemplo n.º 5
0
def single_main():
    """MAIN"""
    cam = cv2.VideoCapture(VIDEO_PATH)
    _, sample_frame = cam.read()

    # Load Landmark detector
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    while True:
        frame_got, frame = cam.read()
        if frame_got is False:
            break
        img_queue.put(frame)
        start = cv2.getTickCount()

        # Get face from box queue.
        faceboxes = box_queue.get()
        if faceboxes is not None:
            for facebox in faceboxes:
                # if facebox is not None:
                # Detect landmarks from image of 64x64.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2)
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
                face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE,
                                             1)
                marks = mark_detector.detect_marks_keras(
                    face_img0)  # landmark predictor
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Draw Predicted Landmarks
                mark_detector.draw_marks(frame, marks, color=(255, 255, 255))

        fps_time = (cv2.getTickCount() - start) / cv2.getTickFrequency()
        cv2.putText(frame, '%.1ffps' % (1 / fps_time),
                    (frame.shape[1] - 65, 15), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                    (127, 127, 127))
        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == ord('q'):
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    # out.release()
    cam.release()
    cv2.destroyAllWindows()
Exemplo n.º 6
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))
            right_corner = tuple([int(i) for i in marks[36]])
            left_corner = tuple([int(i) for i in marks[45]])
            # print(marks[36], marks[45])
            cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2)

            pixel_distance = int(
                math.sqrt((right_corner[0] - left_corner[0])**2 +
                          (right_corner[1] - left_corner[1])**2))
            estimated_distance = (real_width * focal_length) / pixel_distance

            cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            print(steady_pose[1])
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            angles = process_eyes(frame, marks)
            if bool(angles) is True:
                # print(angles)
                angles = cvt_to_radians(angles)
                rotated_vector = rotate_vector(steady_pose[0],
                                               angles['right'][0],
                                               angles['right'][1])
                shifted_translation_vector = np.copy(steady_pose[1])
                shifted_translation_vector[0] += 50
                shifted_translation_vector[1] += 50
                pose_estimator.draw_axes(frame, rotated_vector,
                                         shifted_translation_vector)

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Exemplo n.º 7
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    #cam.set(3, 640)
    #cam.set(4, 480)

    sF = 1.05
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    img_queue.put(sample_frame)
    thread = threading.Thread(target=get_face,
                              args=(mark_detector, img_queue, box_queue))
    thread.daemon = True
    thread.start()
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.f
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(facebox,
                                               frame,
                                               stabile_pose[0],
                                               stabile_pose[1],
                                               color=(128, 255, 128))

        ret, img = cap.read()
        # gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        # blur = cv2.GaussianBlur(gray,(5,5),0)
        # thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)
        x = width / 2 + 275
        y = height / 2 + 275
        text_color = (255, 0, 0)
        cv2.imshow("input", cv2.resize(frame, (800, 600)))

        # Show preview.

        if cv2.waitKey(10) == 27:
            break
Exemplo n.º 8
0
class PoseHeadNode(object):
    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \
                                queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()

    def init(self):

        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(6)
        ]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))

    def pose_cb(self, data):

        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x,
                                body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)

    def draw_pose(self, img, humans, parts=[4, 7]):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        image_h, image_w, _ = img.shape
        if parts is None or len(parts) == 0:
            return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        else:

            for human in humans:
                for part in parts:

                    if part in human.body_parts.keys():
                        body_part = human.body_parts[part]
                        coord = (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))
                        img = cv2.circle(img, coord, 2, (0, 255, 0))
            return img

    def color_callback(
        self, data
    ):  # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480, 640, 3))
        #print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def get_top_vertice_human(self, human, img_w, img_h):
        sorted_id = sorted(human["pose"].body_parts.keys())
        body_part = human["pose"].body_parts[sorted_id[0]]
        return (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))

    def is_gazing(self, face_direction, human, img):
        def get_angle_direct_with_peakhand(face_direction, peak):
            d0, d1 = face_direction
            peak_direction = (peak[0] - d0[0], peak[1] - d0[1])
            face_direct = (d1[0] - d0[0], d1[1] - d0[1])

            d_peak_direction = np.sqrt(peak_direction[0]**2 +
                                       peak_direction[1]**2)
            d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2)
            return (peak_direction[0]*face_direct[0] + \
                    peak_direction[1]*face_direct[1])/(d_peak_direction*\
                    d_face_direct)

        thresh = 0.1  #0.5 #cos(PI/3)

        image_h, image_w, _ = img.shape
        if 4 in human.body_parts.keys():
            body_part = human.body_parts[4]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle left = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        if 7 in human.body_parts.keys():
            body_part = human.body_parts[7]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle right = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        return False

    def get_gazing_status(self, face_directions, face_coords, humans, img):
        def get_dist_to_humans(human_coords, coord):
            result = []

            for _coord in human_coords:
                result.append(np.sqrt((coord[0]-_coord[0])**2) + \
                        (coord[1]-_coord[1])**2)
            return result

        image_h, image_w, _ = img.shape
        gazing_status = []

        human_coords = []

        for human in humans:

            sorted_id = sorted(human.body_parts.keys())
            body_part = human.body_parts[sorted_id[0]]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))

            human_coords.append(coord)

        for fdirection, fcoord in zip(face_directions, face_coords):
            dists = get_dist_to_humans(human_coords, fcoord)
            idx = np.argmin(dists)
            gazing_status.append(self.is_gazing(fdirection, humans[idx], img))
            del human_coords[idx]

        return gazing_status

    def publish_headpose_info(self, viz=True):

        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None or self.humans is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480 * 2, 640 * 2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes = self.mark_detector.extract_cnn_facebox(frame)
        mess = "Not detect pose"
        face_directions = []
        face_coords = []

        if faceboxes is not None and len(faceboxes) > 0:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                #facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_coords.append((int((facebox[0]+facebox[2])/2), \
                                int((facebox[1]+facebox[3])/2)))
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                face_direction = self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))
                face_directions.append(face_direction)
                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])
        gazing_status = self.get_gazing_status(face_directions, face_coords, \
                copy.deepcopy(self.humans), self.color_img)

        msg = GazingInfo()
        msg.is_gazing = []
        msg.coords = []

        for coord, gazestatus in zip(face_coords, gazing_status):
            _coord = Coord2D()
            _coord.x = coord[0]
            _coord.y = coord[1]
            msg.coords.append(_coord)
            msg.is_gazing.append(gazestatus)

        self.pub_headpose_info.publish(msg)

        print(gazing_status)
        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
Exemplo n.º 9
0
class PoseHeadNode(object):

    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \
        #                        queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()
        
    
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
    
    def pose_cb(self, data):
        
        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)
    
    def draw_pose(self, img, humans):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)

    def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480,640, 3))
        print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def publish_headpose_info(self, viz=True):
        
        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480*2, 640*2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes, landmark = self.detector.detect(frame, \
                thresh, scales=scales, do_flip=False)
        mess = "Not detect pose"
        
        if faceboxes is not None and len(faceboxes) > 0:
            #if isinstance(faceboxes[1], int):
            #    faceboxes = [faceboxes]
            for facebox in faceboxes:
                facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                                facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(
                     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess=round(1/(t2-t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
                
                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)