示例#1
0
    def get_all_poses(self, image, mark_detector=None, face_detector=None, draw=False, draw_marks=False):
        """
        Get all the poses in an image
        """
        CNN_INPUT_SIZE = 128

        print("Initializing detectors")
        # Introduce mark_detector to detect landmarks.
        if mark_detector is None:
            from .mark_detector import MarkDetector
            mark_detector = MarkDetector()
        if face_detector is None:
            from .mark_detector import FaceDetector
            face_detector = FaceDetector()

        confidences, faceboxes = face_detector.get_faceboxes(
            image, threshold=0.2)

        poses = []
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = image[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            if draw_marks:
                mark_detector.draw_marks(image, marks, color=(0, 0, 255))

            detected_face = image[facebox[1]: facebox[3],
                                  facebox[0]: facebox[2]]  # crop detected face
            detected_face = cv2.cvtColor(
                detected_face, cv2.COLOR_BGR2GRAY)  # transform to gray scale
            detected_face = cv2.resize(
                detected_face, (48, 48))  # resize to 48x48

            # Try pose estimation with 68 points.
            pose = self.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)

            poses.append((pose[0].copy(), pose[1].copy()))

            if draw:
                self.draw_annotation_box(
                    image, pose[0], pose[1], color=(255, 128, 128))

        return poses
示例#2
0
def main(strargument):
    shutil.rmtree("./test")
    os.mkdir("./test")
    os.remove("result.txt")
    f = open("result.txt", "a")
    cap = cv2.VideoCapture(strargument)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap = cv2.VideoCapture("NTQ.mkv")
    #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi")
    #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg")
    # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview")
    success, frame = cap.read()
    startId = countIdFolder("./face_db/")
    # quit if unable to read the video file
    if not success:
        print('Failed to read video')
        sys.exit(1)
    #The color of the rectangle we draw around the face
    rectangleColor = (0, 165, 255)
    #variables holding the current frame number and the current faceid
    frameCounter = 0
    currentFaceID = 0
    #Variables holding the correlation trackers and the name per faceid
    conf = configparser.ConfigParser()
    conf.read("config/main.cfg")
    mtcnn_detector = load_mtcnn(conf)
    MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH")
    VERIFICATION_THRESHOLD = float(
        conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD"))
    FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH")
    BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH"))
    MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE"))
    MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL"))
    YAWL = int(conf.get("CUSTOM", "YAWL"))
    YAWR = int(conf.get("CUSTOM", "YAWR"))
    PITCHL = int(conf.get("CUSTOM", "PITCHL"))
    PITCHR = int(conf.get("CUSTOM", "PITCHR"))
    ROLLL = int(conf.get("CUSTOM", "ROLLL"))
    ROLLR = int(conf.get("CUSTOM", "ROLLR"))
    MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED"))
    IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH"))
    EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y"))
    EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X"))
    SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH"))
    MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN"))
    MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE"))
    LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME"))
    ROIXL = int(conf.get("CUSTOM", "ROIXL"))
    ROIXR = int(conf.get("CUSTOM", "ROIXR"))
    ROIYB = int(conf.get("CUSTOM", "ROIYB"))
    ROIYA = int(conf.get("CUSTOM", "ROIYA"))
    maxDisappeared = MAXDISAPPEARED  ## khong xuat hien toi da 100 frame
    faces_db = load_faces(FACE_DB_PATH, mtcnn_detector)
    # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector)
    time.sleep(10)
    for item in faces_db:
        print(item["name"])
    listTrackedFace = []
    mark_detector = MarkDetector()
    tm = cv2.TickMeter()
    _, sample_frame = cap.read()
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    with tf.Graph().as_default():
        with tf.Session() as sess:
            load_mobilefacenet(MODEL_PATH)
            inputs_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            try:
                start = time.time()
                while True:
                    start1 = time.time()
                    retval, frame = cap.read()

                    #Increase the framecounter
                    frameCounter += 1
                    if retval:
                        _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR]
                        cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB),
                                      (0, 0, 255), 2)
                        good_face_index = []
                        # faces_db = load_face_db.face_db
                        if (frameCounter % 1) == 0:
                            ### embed and compare name
                            for i, face_db in enumerate(faces_db):
                                if not os.path.isdir(
                                        "./face_db/" +
                                        face_db["name"].split("_")[0]):
                                    faces_db.pop(i)
                            faces, landmarks = mtcnn_detector.detect(_frame)
                            if faces.shape[0] is not 0:
                                input_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                save_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                (yaw, pitch, roll) = (0, 0, 0)
                                for i, face in enumerate(faces):
                                    if round(faces[i, 4], 6) > IS_FACE_THRESH:
                                        bbox = faces[i, 0:4]
                                        points = landmarks[i, :].reshape(
                                            (5, 2))
                                        nimg = face_preprocess.preprocess(
                                            _frame,
                                            bbox,
                                            points,
                                            image_size='112,112')
                                        save_images[i, :] = nimg
                                        nimg = nimg - 127.5
                                        nimg = nimg * 0.0078125
                                        input_images[i, :] = nimg
                                        (x1, y1, x2, y2) = bbox.astype("int")
                                        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2:
                                            continue
                                        temp = int((y2 - y1) / EXTEND_Y)
                                        y1 = y1 + temp
                                        y2 = y2 + temp
                                        temp = int((x2 - x1) / EXTEND_X)
                                        if x1 > temp:
                                            x1 = x1 - temp
                                        x2 = x2 + temp
                                        # cv2.imshow("mainframe",frame)
                                        # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2])
                                        face_img = cv2.resize(
                                            _frame[y1:y2, x1:x2], (128, 128))
                                        # cv2.imshow("ok",face_img)
                                        face_img = cv2.cvtColor(
                                            face_img, cv2.COLOR_BGR2RGB)
                                        tm.start()
                                        marks = mark_detector.detect_marks(
                                            [face_img])
                                        tm.stop()
                                        marks *= (x2 - x1)
                                        marks[:, 0] += x1
                                        marks[:, 1] += y1
                                        # mark_detector.draw_marks(
                                        #         frame, marks, color=(0, 255, 0))
                                        pose, (
                                            yaw, pitch, roll
                                        ) = pose_estimator.solve_pose_by_68_points(
                                            marks)
                                        # temp = frame
                                        # cv2.putText(temp,"yaw:  "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"roll:   "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.imshow("frame",temp)
                                        # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR):
                                        #     good_face_index.append(i)
                                        # cv2.waitKey(0)
                                # print(good_face_index)
                                feed_dict = {inputs_placeholder: input_images}
                                emb_arrays = sess.run(embeddings,
                                                      feed_dict=feed_dict)
                                emb_arrays = sklearn.preprocessing.normalize(
                                    emb_arrays)
                                names = []
                                sims = []
                                for i, embedding in enumerate(emb_arrays):
                                    # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False:
                                    #     names.append(listTrackedFace[i].name)
                                    #     continue
                                    embedding = embedding.flatten()
                                    temp_dict = {}
                                    for com_face in faces_db:
                                        ret, sim = feature_compare(
                                            embedding, com_face["feature"],
                                            0.65)
                                        temp_dict[com_face["name"]] = sim
                                    # print(temp_dict)
                                    dictResult = sorted(temp_dict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    # print(dictResult[:5])
                                    name = ""
                                    if len(dictResult) > 0 and dictResult[0][
                                            1] > VERIFICATION_THRESHOLD:
                                        name = dictResult[0][
                                            0]  #.split("_")[0]
                                        sim = dictResult[0][1]
                                        ## wite log
                                        t = time.time()
                                        f.write(name + "___" +
                                                str((t - start) // 60) + ":" +
                                                str(int(t - start) % 60) +
                                                "\n")
                                    else:
                                        name = "unknown"
                                        sim = 0
                                    names.append(name)
                                    sims.append(sim)

                                    # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:])
                                    # if len(dictResult)>0 :
                                    # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:])
                                    ################################ tracker
                                for i, embedding in enumerate(emb_arrays):
                                    embedding = embedding.flatten()
                                    ResultDict = {}
                                    for objectTrackFace in listTrackedFace:
                                        tempList = []
                                        (x1, y1, x2,
                                         y2) = objectTrackFace.latestBox
                                        for com_face in objectTrackFace.listEmbedding:
                                            ret, sim = feature_compare(
                                                embedding, com_face, 0.65)
                                            tempList.append(sim)
                                        tempList.sort(reverse=True)
                                        if len(tempList) > 0:
                                            if tempList[0] > 0.9 or (
                                                    similarIOU(
                                                        faces[i, :4].astype(
                                                            "int"),
                                                        objectTrackFace.
                                                        latestBox) and
                                                (frameCounter - objectTrackFace
                                                 .latestFrameCounter) < 3):
                                                ResultDict[objectTrackFace.
                                                           name] = tempList[0]
                                    dictResult = sorted(ResultDict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    if True:
                                        if len(
                                                ResultDict
                                        ) > 0 and dictResult[0][
                                                1] > SIMILAR_THRESH:  ## neu khop -- 0.5
                                            # for ik in range(len(dict)):
                                            #     if dict[ik][1]>SIMILAR_THRESH:

                                            nameTrackCurrent = dictResult[0][0]
                                            for index, tempFaceTrack in enumerate(
                                                    listTrackedFace):
                                                if tempFaceTrack.name == nameTrackCurrent:
                                                    if len(tempFaceTrack.
                                                           listImage
                                                           ) > MAX_LIST_LEN:
                                                        tempFaceTrack.listImage.pop(
                                                            0)
                                                        tempFaceTrack.listEmbedding.pop(
                                                            0)
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    else:
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    if names[i] != "unknown":
                                                        if RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            tempFaceTrack.name = names[
                                                                i]
                                                        # else: #################
                                                        #     names[i] = nameTrackCurrent
                                                    else:
                                                        if not RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            names[
                                                                i] = nameTrackCurrent
                                                    tempFaceTrack.countDisappeared = 0
                                                    tempFaceTrack.latestBox = faces[
                                                        i, 0:4].astype("int")
                                                    tempFaceTrack.latestFrameCounter = frameCounter
                                                    tempFaceTrack.liveTime = 0
                                                    tempFaceTrack.justAdded = True  ## but we still action with it
                                                    break

                                        else:  ## neu khong khop thi tao moi nhung chi them anh khi mat du tot
                                            if len(ResultDict) > 0:
                                                print(dictResult[0][1])
                                            if names[i] != "unknown":
                                                newTrackFace = trackedFace(
                                                    names[i])
                                            else:
                                                newTrackFace = trackedFace(
                                                    str(currentFaceID))
                                                currentFaceID = currentFaceID + 1
                                            if measureGoodFace(
                                                    MIN_FACE_SIZE,
                                                    MAX_BLACK_PIXEL,
                                                    save_images[i, :], yaw,
                                                    pitch, roll, BLUR_THRESH,
                                                    YAWL, YAWR, PITCHL, PITCHR,
                                                    ROLLL, ROLLR):
                                                newTrackFace.listImage.append(
                                                    save_images[i, :])
                                                newTrackFace.listEmbedding.append(
                                                    emb_arrays[i])
                                            newTrackFace.latestBox = faces[
                                                i, 0:4].astype("int")
                                            newTrackFace.latestFrameCounter = frameCounter
                                            # print(newTrackFace.latestBox)
                                            newTrackFace.justAdded = True
                                            listTrackedFace.append(
                                                newTrackFace)  ## add list
                                ### disappeared
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    if trackFace.justAdded == False:
                                        trackFace.countDisappeared = trackFace.countDisappeared + 1
                                        trackFace.liveTime = trackFace.liveTime + 1
                                    else:
                                        trackFace.justAdded = False
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                                for i, face in enumerate(faces):
                                    x1, y1, x2, y2 = faces[i][0], faces[i][
                                        1], faces[i][2], faces[i][3]
                                    x1 = max(int(x1), 0)
                                    y1 = max(int(y1), 0)
                                    x2 = min(int(x2), _frame.shape[1])
                                    y2 = min(int(y2), _frame.shape[0])
                                    cv2.rectangle(frame,
                                                  (x1 + ROIXL, y1 + ROIYA),
                                                  (x2 + ROIXL, y2 + ROIYA),
                                                  (0, 255, 0), 2)
                                    # if i in good_face_index:
                                    # if not RepresentsInt(names[i]):
                                    cv2.putText(frame, names[i].split("_")[0],
                                                (int(x1 / 2 + x2 / 2 + ROIXL),
                                                 int(y1 + ROIYA)),
                                                cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                (255, 255, 255), 2)
                            else:
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    trackFace.countDisappeared = trackFace.countDisappeared + 1
                                    trackFace.liveTime = trackFace.liveTime + 1
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                        continue
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                            end = time.time()
                            cv2.putText(frame,
                                        "FPS: " + str(1 // (end - start1)),
                                        (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 0, 0), 3)
                            cv2.putText(
                                frame, "Time: " + str((end - start) // 60) +
                                ":" + str(int(end - start) % 60), (200, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
                        cv2.imshow("frame", frame)
                        key = cv2.waitKey(30)
                        if key & 0xFF == ord('q'):
                            break
                        if key == 32:
                            cv2.waitKey(0)
                    else:
                        break
            except KeyboardInterrupt as e:
                pass
    gc.collect()
    cv2.destroyAllWindows()
    exit(0)
def Image_Database_Generator():
    mark_detector = MarkDetector()
    video_capture = cv2.VideoCapture(file)
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)

    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok='True')

    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok='True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    number_of_images = 0
    MAX_NUMBER_OF_IMAGES = 50
    count = 0
    count = 0
    while number_of_images < MAX_NUMBER_OF_IMAGES:
        ret, frame = video_capture.read()
        count += 1
        if ret == False:
            break
        if count % 5 != 0:
            continue

        frame = cv2.flip(frame, 1)
        height, width, _ = frame.shape
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        original_frame = frame.copy()

        height, width = frame.shape[:2]

        shape_predictor = dlib.shape_predictor(
            "shape_predictor_68_face_landmarks.dat")
        face_aligner = FaceAligner(shape_predictor,
                                   desiredFaceWidth=FACE_WIDTH)

        facebox = mark_detector.extract_cnn_facebox(frame)

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1 = max(facebox[0] - 0, 0)
            x2 = min(facebox[2] + 0, width)
            y1 = max(facebox[1] - 0, 0)
            y2 = min(facebox[3] + 0, height)

            face = frame[y1:y2, x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            marks = mark_detector.detect_marks([face_img])

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Show preview.
            if showit:
                frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0),
                                      2)
                cv2.imshow("Preview", frame)

            face = dlib.rectangle(x1, y1, x2, y2)
            face_aligned = face_aligner.align(original_frame, frame_gray, face)
            cv2.imwrite(
                os.path.join(directory,
                             str(name) + '_' + str(number_of_images) + '.jpg'),
                face_aligned)

            number_of_images += 1
            print(number_of_images)

        if cv2.waitKey(10) == 27:
            break
    print('Thank you')
    video_capture.release()
    cv2.destroyAllWindows()
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks(face_img)
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            marks = np.array([[218.4677, 89.87073], [220.22453, 121.96762],
示例#5
0
class PoseHeadNode(object):

    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \
        #                        queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()
        
    
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
    
    def pose_cb(self, data):
        
        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)
    
    def draw_pose(self, img, humans):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)

    def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480,640, 3))
        print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def publish_headpose_info(self, viz=True):
        
        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480*2, 640*2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes, landmark = self.detector.detect(frame, \
                thresh, scales=scales, do_flip=False)
        mess = "Not detect pose"
        
        if faceboxes is not None and len(faceboxes) > 0:
            #if isinstance(faceboxes[1], int):
            #    faceboxes = [faceboxes]
            for facebox in faceboxes:
                facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                                facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(
                     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess=round(1/(t2-t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
                
                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
示例#6
0
def main():

    ''' main function'''

    img_dir = "img_dir/"     # image file dir

    # setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_list = []
    for filename in os.listdir(img_dir):
        frame = cv2.imread(img_dir + filename)
        img_list.append(frame)
        img_queue.put(frame)
    box_process = Process(target=get_face, args=(img_queue, box_queue))
    box_process.start()

    # introduce mark_detector to detect landmarks
    mark_detector = MarkDetector()

    while True:
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip
        # frame = cv2.flip(frame, 2)

        if len(img_list) != 0:
            frame = img_list.pop(0)
            raw_frame = frame.copy()
            # introduce pos estimator to solve pose. Get one frames to setup the
            # estimator according to the images size.
            height, width = frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))
        else:
            break

        # Pose estimation by 3 steps.
        # 1. detect faces.
        # 2. detect landmarks.
        # 3. estimate pose.

        # get face from box queue.
        faceboxes = box_queue.get()
        # print("the length of facebox is " + str(len(faceboxes)))
        for facebox in faceboxes:
            # detect landmarks from image 128 * 128
            face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]    # cut off the area of face.

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)    # BGR -> RGB
            marks = mark_detector.detect_marks(face_img)

            # covert the marks locations from local CNN to global image.
            marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle.
            marks[:, 1] *=  (facebox[3] - facebox[1])  # the length of rectangle.
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # uncomment following line to show raw marks.
            # util.draw_marks(frame, marks, color=(0, 255, 0))
            # util.draw_faceboxes(frame, facebox)

            # try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
           
            # pose[0] is rotation_vector, and pose[1] is translation_vector
            
            pose_np = np.array(pose).flatten()
            pose = np.reshape(pose_np, (-1, 3))

            angle = util.get_angle(pose[0])
            pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2])

        # show preview
        cv2.imshow("Preview", frame)
        cv2.imshow("raw_frame", raw_frame)
        if cv2.waitKey(0) == 27:
            continue

    # clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#7
0
def main():

    # Initiate Class for Face & Mark Detector
    face_detector = FaceDetector()
    mark_detector = MarkDetector()

    # Landmark 3D for projection and landmark 2d index of corresponding mark
    landmarks_3d = np.array(
        [
            [0.000000, 0.000000, 6.763430],  # 33 nose bottom edge
            [6.825897, 6.760612, 4.402142],  # 17 left brow left corner
            [1.330353, 7.122144, 6.903745],  # 21 left brow right corner
            [-1.330353, 7.122144, 6.903745],  # 22 right brow left corner
            [-6.825897, 6.760612, 4.402142],  # 26 right brow right corner
            [5.311432, 5.485328, 3.987654],  # 36 left eye left corner
            [1.789930, 5.393625, 4.413414],  # 39 left eye right corner
            [-1.789930, 5.393625, 4.413414],  # 42 right eye left corner
            [-5.311432, 5.485328, 3.987654],  # 45 right eye right corner
            [2.005628, 1.409845, 6.165652],  # 31 nose left corner
            [-2.005628, 1.409845, 6.165652],  # 35 nose right corner
            [2.774015, -2.080775, 5.048531],  # 48 mouth left corner
            [-2.774015, -2.080775, 5.048531],  # 54 mouth right corner
            [0.000000, -3.116408, 6.097667],  # 57 mouth central bottom corner
            [0.000000, -7.415691, 4.070434]  # 8 chin corner
        ],
        dtype=np.double)
    lm_2d_index = [33, 17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8]

    # Define color for facebox
    color = (244, 134, 66)

    # Initiate Video Streaming
    file_name = 'assets/trump.gif'
    vs = FileVideoStream(file_name).start()
    time.sleep(2.0)

    frame = vs.read()

    while frame is not None:
        frame = imutils.resize(frame, width=800, height=600)
        (H, W) = frame.shape[:2]

        faceboxes = face_detector.extract_square_facebox(frame)

        if faceboxes is not None:
            for facebox in faceboxes:
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]

                marks = mark_detector.detect_marks(face_img)
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                rvec, tvec, cm, dc = get_headpose(h=H,
                                                  w=W,
                                                  landmarks_2d=marks,
                                                  landmarks_3d=landmarks_3d,
                                                  lm_2d_index=lm_2d_index)

                draw_front_box(frame, color, rvec, tvec, cm, dc)

        cv2.imshow("3D Face Box", frame)
        # writer.write(frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
        if key == ord("w"):
            frame = vs.read()
def Camera_Capture():
    mark_detector = MarkDetector()
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)
    
    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok = 'True')
    
    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok = 'True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    poses=['frontal','right','left','up','down']
    file=0
    cap = cv2.VideoCapture(file)
    
    ret, sample_frame = cap.read()
    i = 0
    count = 0
    if ret==False:
        return    
        
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    
    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]
    images_saved_per_pose=0
    number_of_images = 0
    
    shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH)
    while i<5:
        saveit = False
        # Read frame, crop it, flip it, suits your needs.
        ret, frame = cap.read()
        if ret is False:
            break
        if count % 5 !=0: # skip 5 frames
            count+=1
            continue
        if images_saved_per_pose==IMAGE_PER_POSE:
            i+=1
            images_saved_per_pose=0

        # If frame comes from webcam, flip it so it looks like a mirror.
        if file == 0:
            frame = cv2.flip(frame, 2)
        original_frame=frame.copy()
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        facebox = mark_detector.extract_cnn_facebox(frame)
    
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1=max(facebox[0]-0,0)
            x2=min(facebox[2]+0,width)
            y1=max(facebox[1]-0,0)
            y2=min(facebox[3]+0,height)
            
            face = frame[y1: y2,x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    
            marks = mark_detector.detect_marks([face_img])
    
            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]
        
            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
    
            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))
    
            # print(steady_pose[0][0])
            # if steady_pose[0][0]>0.1:
            #     print('right')
            # else: 
            #     if steady_pose[0][0]<-0.1:
            #         print('left')
            # if steady_pose[0][1]>0.1:
            #     print('down')
            # else: 
            #     if steady_pose[0][1]<-0.1:
            #         print('up')
            # print(steady_pose[0])
            if i==0:
                if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True                    
            if i==1:
                if steady_pose[0][0]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==2:
                if steady_pose[0][0]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==3:
                if steady_pose[0][1]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==4:
                if steady_pose[0][1]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            # Show preview.
            if i>=5:
                print ('Thank you')
                break

            frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA)
            frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2)
            cv2.imshow("Preview", frame)

            if saveit:                        
                face = dlib.rectangle(x1, y1, x2, y2)
                face_aligned = face_aligner.align(original_frame, frame_gray, face)
                cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned)
                print(images_saved_per_pose)
                number_of_images+=1

        if cv2.waitKey(10) == 27:
            break    
    cap.release()
    cv2.destroyAllWindows()
示例#9
0
class PoseHeadNode(object):
    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \
                                queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()

    def init(self):

        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(6)
        ]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))

    def pose_cb(self, data):

        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x,
                                body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)

    def draw_pose(self, img, humans, parts=[4, 7]):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        image_h, image_w, _ = img.shape
        if parts is None or len(parts) == 0:
            return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        else:

            for human in humans:
                for part in parts:

                    if part in human.body_parts.keys():
                        body_part = human.body_parts[part]
                        coord = (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))
                        img = cv2.circle(img, coord, 2, (0, 255, 0))
            return img

    def color_callback(
        self, data
    ):  # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480, 640, 3))
        #print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def get_top_vertice_human(self, human, img_w, img_h):
        sorted_id = sorted(human["pose"].body_parts.keys())
        body_part = human["pose"].body_parts[sorted_id[0]]
        return (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))

    def is_gazing(self, face_direction, human, img):
        def get_angle_direct_with_peakhand(face_direction, peak):
            d0, d1 = face_direction
            peak_direction = (peak[0] - d0[0], peak[1] - d0[1])
            face_direct = (d1[0] - d0[0], d1[1] - d0[1])

            d_peak_direction = np.sqrt(peak_direction[0]**2 +
                                       peak_direction[1]**2)
            d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2)
            return (peak_direction[0]*face_direct[0] + \
                    peak_direction[1]*face_direct[1])/(d_peak_direction*\
                    d_face_direct)

        thresh = 0.1  #0.5 #cos(PI/3)

        image_h, image_w, _ = img.shape
        if 4 in human.body_parts.keys():
            body_part = human.body_parts[4]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle left = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        if 7 in human.body_parts.keys():
            body_part = human.body_parts[7]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle right = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        return False

    def get_gazing_status(self, face_directions, face_coords, humans, img):
        def get_dist_to_humans(human_coords, coord):
            result = []

            for _coord in human_coords:
                result.append(np.sqrt((coord[0]-_coord[0])**2) + \
                        (coord[1]-_coord[1])**2)
            return result

        image_h, image_w, _ = img.shape
        gazing_status = []

        human_coords = []

        for human in humans:

            sorted_id = sorted(human.body_parts.keys())
            body_part = human.body_parts[sorted_id[0]]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))

            human_coords.append(coord)

        for fdirection, fcoord in zip(face_directions, face_coords):
            dists = get_dist_to_humans(human_coords, fcoord)
            idx = np.argmin(dists)
            gazing_status.append(self.is_gazing(fdirection, humans[idx], img))
            del human_coords[idx]

        return gazing_status

    def publish_headpose_info(self, viz=True):

        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None or self.humans is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480 * 2, 640 * 2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes = self.mark_detector.extract_cnn_facebox(frame)
        mess = "Not detect pose"
        face_directions = []
        face_coords = []

        if faceboxes is not None and len(faceboxes) > 0:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                #facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_coords.append((int((facebox[0]+facebox[2])/2), \
                                int((facebox[1]+facebox[3])/2)))
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                face_direction = self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))
                face_directions.append(face_direction)
                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])
        gazing_status = self.get_gazing_status(face_directions, face_coords, \
                copy.deepcopy(self.humans), self.color_img)

        msg = GazingInfo()
        msg.is_gazing = []
        msg.coords = []

        for coord, gazestatus in zip(face_coords, gazing_status):
            _coord = Coord2D()
            _coord.x = coord[0]
            _coord.y = coord[1]
            msg.coords.append(_coord)
            msg.is_gazing.append(gazestatus)

        self.pub_headpose_info.publish(msg)

        print(gazing_status)
        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
示例#10
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        engine.say(
            "Warning: video source not assigned, default webcam will be used")
        engine.runAndWait()
        video_src = 0
    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    gaze = GazeTracking()
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    head_flag = 0
    gaze_flag = 0
    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]
        #audio_record(AUDIO_OUTPUT, 3)
        #sphinx_recog(AUDIO_OUTPUT)
        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()
        gaze.refresh(frame)
        frame = gaze.annotated_frame()
        text = ""
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            rects = detector(gray, 0)
            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()
            #
            for rect in rects:
                # determine the facial landmarks for the face region, then
                # convert the facial landmark (x, y)-coordinates to a NumPy
                # array
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(
                    shape)  #converting to NumPy Array矩阵运算
                mouth = shape[Start:End]
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]
                leftEAR = eye_aspect_ratio(leftEye)  #眼睛长宽比
                rightEAR = eye_aspect_ratio(rightEye)
                ear = (leftEAR + rightEAR) / 2.0  #长宽比平均值
                lipdistance = lip_distance(shape)

                if (lipdistance > YAWN_THRESH):
                    #print(lipdistance)
                    flag0 += 1
                    print("yawning time: ", flag0)
                    if flag0 >= 40:
                        cv2.putText(frame, "Yawn Alert", (10, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        cv2.putText(frame, "Yawn Alert", (220, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        engine.say("don't yawn")
                        engine.runAndWait()
                        flag0 = 0
                else:
                    flag0 = 0

                if (ear < thresh):
                    flag += 1
                    print("eyes closing time: ", flag)
                    if flag >= frame_check:
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        engine.say("open your eyes")
                        engine.runAndWait()
                        flag = 0
                else:
                    flag = 0
                if gaze.is_right():
                    print("Looking right")
                    text = "Looking right"
                elif gaze.is_left():
                    print("Looking left")
                    text = "Looking left"
                elif gaze.is_up():
                    text = "Looking up"
                else:
                    text = "Looking center"
                if text is not "Looking center":
                    gaze_flag += 1
                    if gaze_flag >= 20:
                        engine.say("look forward")
                        engine.runAndWait()
                        gaze_flag = 0
                else:
                    gaze_flag = 0
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            # get angles
            angles = pose_estimator.get_angles(pose[0], pose[1])
            if ((-8 > angles[0] or angles[0] > 8)
                    or (-8 > angles[1] or angles[1] > 8)):
                head_flag += 1
                if head_flag >= 40:
                    print(angles[0])
                    engine.say("please look ahead")
                    engine.runAndWait()
            else:
                head_flag = 0
            # pose_estimator.draw_info(frame, angles)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               pose[0],
                                               pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
            #pose_estimator.show_3d_model

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main(video_src):
    """MAIN
    
    :param video_src: Source of video to analyze
    :type video_src: str or int"""
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    face_detector = FaceDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(mark_detector, img_queue, box_queue,))
    #box_process = Process(target=get_faces, args=(face_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    #face expression recognizer initialization
    from keras.models import model_from_json
    model = model_from_json(open("./model/facial_expression_model_structure.json", "r").read())
    model.load_weights('./model/facial_expression_model_weights.h5') #load weights

    #-----------------------------

    emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')

    while True:
        input()
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.

        #facebox = box_queue.get()

        faceboxes = dump_queue(box_queue)
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            detected_face = frame[facebox[1]: facebox[3],facebox[0]: facebox[2]] #crop detected face
            detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale
            detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48



            # emotion estimation
            img_pixels = image.img_to_array(detected_face)
            img_pixels = np.expand_dims(img_pixels, axis = 0)

            img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1]

            predictions = model.predict(img_pixels) #store probabilities of 7 expressions

            #find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral
            max_index = np.argmax(predictions[0])

            emotion = emotions[max_index]

            #write emotion text above rectangle

            #V_FONT_HERSHEY_SIMPLEX normal size sans-serif font
            #CV_FONT_HERSHEY_PLAIN small size sans-serif font
            #CV_FONT_HERSHEY_DUPLEX normal size sans-serif font (more complex than CV_FONT_HERSHEY_SIMPLEX )
            #CV_FONT_HERSHEY_COMPLEX normal size serif font
            #CV_FONT_HERSHEY_TRIPLEX normal size serif font (more complex than CV_FONT_HERSHEY_COMPLEX )
            #CV_FONT_HERSHEY_COMPLEX_SMALL smaller version of CV_FONT_HERSHEY_COMPLEX
            #CV_FONT_HERSHEY_SCRIPT_SIMPLEX hand-writing style font
            #CV_FONT_HERSHEY_SCRIPT_COMPLEX more complex variant of CV_FONT_HERSHEY_SCRIPT_SIMPLEX

            image_text = ""
            for index in range(len(emotions)):
                if predictions[0][index]>0.3:
                    image_text += "{0} : {1} %\n".format(emotions[index], int(predictions[0][index]*100))

            space = 0
            for text in image_text.strip().split("\n"):
                cv2.putText(
                img = frame,
                text= text,
                org =(int(facebox[0]), int(facebox[1])-space),
                fontFace = cv2.FONT_HERSHEY_PLAIN,
                fontScale = 0.8,
                color   =  (255,255,255),
                thickness = 1
                )
                space += int(0.25*48)



            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)


            # Stabilize the pose.
            #stabile_pose = []
            #pose_np = np.array(pose).flatten()
            #for value, ps_stb in zip(pose_np, pose_stabilizers):
            #    ps_stb.update([value])
            #    stabile_pose.append(ps_stb.state[0])
            #stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                 frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))


        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
示例#12
0
class HeadPoseEstimator():
    def __init__(self):
        print "OpenCV version: {}".format(cv2.__version__)
        self.CNN_INPUT_SIZE = 128

        self.mark_detector = MarkDetector()

        height = 480
        width = 640
        self.pose_estimator = PoseEstimator(img_size=(height, width))

        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(8)
        ]

        self.img_queue = Queue()
        self.box_queue = Queue()
        self.box_process = Process(target=self.get_face,
                                   args=(
                                       self.mark_detector,
                                       self.img_queue,
                                       self.box_queue,
                                   ))

        self.process_ctrl_flag = rospy.get_param("process_ctrl", True)
        self.process_ctrl_subscriber = rospy.Subscriber(
            "/head_pose_estimator/process_ctrl", Bool,
            self.process_ctrl_callback)

        self.sub_img_name = rospy.get_param("sub_img_name",
                                            "/usb_cam/image_raw")
        self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image,
                                               self.img_callback)

        self.show_result_img_flag = rospy.get_param("show_result_img", True)
        self.show_axis_flag = rospy.get_param("show_axis", True)
        self.show_annotation_box_flag = rospy.get_param(
            "show_annotation_box", True)
        self.pub_result_img_flag = rospy.get_param("pub_result_img", True)

        self.img_publisher = rospy.Publisher(
            "/head_pose_estimator/result_image", Image, queue_size=10)

        self.result_publisher = rospy.Publisher(
            "/head_pose_estimator/head_pose", HeadPose, queue_size=10)

        self.box_process.start()

    def get_face(self, detector, img_queue, box_queue):
        """
        画像キューから顔を取り出します。
        処理はマルチプロセッシングで行います。
        """
        while not rospy.is_shutdown():
            image = img_queue.get()
            box = detector.extract_cnn_facebox(image)
            box_queue.put(box)

    def process_ctrl_callback(self, msg):
        self.process_ctrl_flag = msg.data

    def euler_to_quaternion(self, euler):
        # オイラー角からクォータニオンに変換
        # 鏡写しになるように値を調整
        q = tf.transformations.quaternion_from_euler(euler.x, euler.y, euler.z)
        return Quaternion(x=q[1], y=-q[2], z=-q[3], w=q[0])
        #return Quaternion(x=q[0], y=q[1], z=q[2], w=q[3])

    def img_callback(self, img_msg):
        if self.process_ctrl_flag == True:
            try:
                cv_img = CvBridge().imgmsg_to_cv2(img_msg, "bgr8")
                flip_frame = cv2.flip(cv_img, 2)  # 画像を鏡写しにする
                frame = cv_img

                img_height, img_width, _ = frame.shape[:3]

                self.img_queue.put(frame)

                facebox = self.box_queue.get()

                # 顔の検出に成功
                if facebox is not None:
                    # 画像から顔をトリミングしてCNNでlandmarksを見つける
                    # img[top : bottom, left : right]
                    face_img = frame[facebox[1]:facebox[3],
                                     facebox[0]:facebox[2]]
                    face_img = cv2.resize(
                        face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE))
                    face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                    face_rect = FaceRect()
                    face_rect.x = facebox[0]
                    face_rect.y = facebox[1]
                    face_rect.width = facebox[2] - facebox[0]
                    face_rect.height = facebox[3] - facebox[1]

                    #face_rect.x = img_width - facebox[0] - (facebox[2] - facebox[0])
                    #face_rect.y = facebox[1]
                    #face_rect.width = facebox[2] - facebox[0]
                    #face_rect.height = facebox[3] - facebox[1]

                    marks = self.mark_detector.detect_marks([face_img])

                    marks *= (facebox[2] - facebox[0])
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]

                    # Uncomment following line to show raw marks.
                    #self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                    # Uncomment following line to show facebox.
                    #self.mark_detector.draw_box(frame, [facebox])

                    pose = self.pose_estimator.solve_pose_by_68_points(marks)
                    #rotation = pose[0]
                    #euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2])
                    #quaternion = self.euler_to_quaternion(euler)

                    # stabilize the pose
                    steady_pose = []
                    pose_np = np.array(pose).flatten()
                    for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                        ps_stb.update([value])
                        steady_pose.append(ps_stb.state[0])
                    steady_pose = np.reshape(steady_pose, (-1, 3))

                    rotation = steady_pose[0]
                    euler = Vector3(x=rotation[0],
                                    y=rotation[1],
                                    z=rotation[2])
                    head_rotation = self.euler_to_quaternion(euler)
                    #print head_rotation
                    #print "---"

                    head_pose = HeadPose()
                    head_pose.face_rect = face_rect
                    head_pose.head_rotation = head_rotation
                    self.result_publisher.publish(head_pose)

                    # Uncomment following line to draw pose annotation on frame.
                    #self.pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128))

                    # Uncomment following line to draw stabile pose annotation on frame.
                    if self.show_annotation_box_flag == True:
                        self.pose_estimator.draw_annotation_box(frame,
                                                                steady_pose[0],
                                                                steady_pose[1],
                                                                color=(128,
                                                                       255,
                                                                       128))

                    # Uncomment following line to draw head axes on frame.
                    # openCV3.4.0では動かない
                    #self.pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

                    if self.show_axis_flag == True:
                        self.pose_estimator.draw_axis(frame, steady_pose[0],
                                                      steady_pose[1])

                if self.show_result_img_flag == True:
                    cv2.imshow("result", frame)
                    cv2.waitKey(1)

                if self.pub_result_img_flag == True:
                    try:
                        pub_img = CvBridge().cv2_to_imgmsg(frame, "bgr8")
                        self.img_publisher.publish(pub_img)

                    except CvBridgeError, e:
                        rospy.logerr(str(e))

            except CvBridgeError, e:
                rospy.logerr(str(e))
示例#13
0
def run():
    # Load the parameters
    conf = config()

    # initialize dlib's face detector (HOG-based) and then create the
    # facial landmark predictor
    print("[INFO] loading facial landmark predictor...")
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(conf.shape_predictor_path)

    # grab the indexes of the facial landmarks for the left and
    # right eye, respectively
    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    # initialize the video stream and sleep for a bit, allowing the
    # camera sensor to warm up
    # cap = cv2.VideoCapture(conf.vedio_path)
    cap = cv2.VideoCapture(0)
    if conf.vedio_path == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()
    # sample_frame = imutils.rotate(sample_frame, 90)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    # Gaze tracking
    gaze = GazeTracking()

    # loop over the frames from the video stream
    temp_steady_pose = 0
    while True:
        # grab the frame from the threaded video stream, resize it to
        # have a maximum width of 400 pixels, and convert it to
        # grayscale
        frame_got, frame = cap.read()

        # Empty frame
        frame_empty = np.zeros(frame.shape)

        # frame = imutils.rotate(frame, 90)
        frame = imutils.resize(frame, width=400)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # detect faces in the grayscale frame
        rects = detector(gray, 0)

        # check to see if a face was detected, and if so, draw the total
        # number of faces on the frame
        if len(rects) > 0:
            text = "{} face(s) found".format(len(rects))
            # cv2.putText(frame, text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX,
            # 			0.5, (0, 0, 255), 2)

            # Empty frame
            cv2.putText(frame_empty, text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, (0, 0, 255), 2)

        # initialize the frame counters and the total number of blinks
        TOTAL = 0
        COUNTER = 0
        # loop over the face detections
        for (i, rect) in enumerate(rects):
            # determine the facial landmarks for the face region, then
            # convert the facial landmark (x, y)-coordinates to a NumPy
            # array
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)

            # ********************************
            # Blink detection
            # extract the left and right eye coordinates, then use the
            # coordinates to compute the eye aspect ratio for both eyes
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)

            # average the eye aspect ratio together for both eyes
            ear = (leftEAR + rightEAR) / 2.0

            # compute the convex hull for the left and right eye, then
            # visualize each of the eyes
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)
            # cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
            # cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)

            # Frame empty
            cv2.drawContours(frame_empty, [leftEyeHull], -1, (0, 255, 0), 1)
            cv2.drawContours(frame_empty, [rightEyeHull], -1, (0, 255, 0), 1)

            # check to see if the eye aspect ratio is below the blink
            # threshold, and if so, increment the blink frame counter
            if ear < conf.EYE_AR_THRESH:
                COUNTER += 1

            # otherwise, the eye aspect ratio is not below the blink
            # threshold
            else:
                # if the eyes were closed for a sufficient number of
                # then increment the total number of blinks
                if COUNTER >= conf.EYE_AR_CONSEC_FRAMES:
                    TOTAL += 1

                # reset the eye frame counter
                COUNTER = 0

            # draw the total number of blinks on the frame along with
            # the computed eye aspect ratio for the frame
            # cv2.putText(frame, "Blinks: {}".format(TOTAL), (30, 60),
            # 			cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
            # cv2.putText(frame, "EAR: {:.2f}".format(ear), (30, 90),
            # 			cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

            # Frame empty
            cv2.putText(frame_empty, "Blinks: {}".format(TOTAL), (30, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
            cv2.putText(frame_empty, "EAR: {:.2f}".format(ear), (30, 90),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
            # ********************************
            # convert dlib's rectangle to a OpenCV-style bounding box
            # [i.e., (x, y, w, h)], then draw the face bounding box
            (x, y, w, h) = face_utils.rect_to_bb(rect)
            # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            #
            # show the face number
            # cv2.putText(frame, "Face #{}".format(i + 1), (30, 120),
            # 			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

            # Frame empty
            cv2.rectangle(frame_empty, (x, y), (x + w, y + h), (0, 255, 0), 2)

            # show the face number
            cv2.putText(frame_empty, "Face #{}".format(i + 1), (30, 120),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)

            # loop over the (x, y)-coordinates for the facial landmarks
            # and draw them on the image
            for (x, y) in shape:
                # cv2.circle(frame, (x, y), 1, (0, 255, 255), -1)
                cv2.circle(frame_empty, (x, y), 1, (0, 255, 255), -1)

        # **********************************************************
        if frame_got is False:
            break

        # If frame comes from webcam, flip it so it looks like a mirror.
        if conf.vedio_path == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img,
                                  (conf.CNN_INPUT_SIZE, conf.CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            # 	frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
            pose_estimator.draw_axes(frame_empty, steady_pose[0],
                                     steady_pose[1])
            print('steady pose vector: {}'.format(steady_pose[0],
                                                  steady_pose[1]))
        else:
            # cv2.putText(frame, "Signal loss", (200, 200),
            # 			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.putText(frame_empty, "Signal loss", (200, 200),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        # ******************************************************************
        # We send this frame to GazeTracking to analyze it
        gaze.refresh(frame)

        frame = gaze.annotated_frame()
        text = ""

        if gaze.is_blinking():
            text = "Blinking"
        elif gaze.is_right():
            text = "Looking right"
        elif gaze.is_left():
            text = "Looking left"
        elif gaze.is_center():
            text = "Looking center"

        cv2.putText(frame_empty, text, (250, 250), cv2.FONT_HERSHEY_DUPLEX,
                    0.5, (147, 58, 31), 2)

        left_pupil = gaze.pupil_left_coords()
        right_pupil = gaze.pupil_right_coords()
        cv2.putText(frame_empty, "Left pupil:  " + str(left_pupil), (250, 280),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)
        cv2.putText(frame_empty, "Right pupil: " + str(right_pupil),
                    (250, 310), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)

        # ********************************************************************
        # show the frame
        # cv2.imshow("Frame", frame)
        cv2.imshow("Frame", frame_empty)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

    # do a bit of cleanup
    cv2.destroyAllWindows()
    cap.stop()
示例#14
0
def process(input_key):
    if exists('pretrain/' + input_key):
        return

    # input_key = data['keys']
    input_file = get_video_file(input_key)

    print('Input file', input_file)

    stream = cv2.VideoCapture(input_file)

    mark_detector = MarkDetector()
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    frame = None
    count = 0
    frame_got = True

    target_folder = 'pretrain/{key}'.format(key=input_key)

    if not exists(target_folder):
        makedirs(target_folder)

    while frame_got:
        frame_got, frame = stream.read()
        if frame_got:
            # print('Frame', frame)
            facebox = mark_detector.extract_cnn_facebox(frame)
            # print('Facebox', facebox)

            if facebox:

                height, width = frame.shape[:2]
                pose_estimator = PoseEstimator(img_size=(height, width))
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img, (input_size, input_size))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                marks = mark_detector.detect_marks(face_img)

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # print('Marks', marks)

                for mark in marks:
                    cv2.circle(frame, tuple(mark), 3, (255, 0, 0))

                # print('Marks Length', len(marks))

                pose = pose_estimator.solve_pose_by_68_points(marks)
                # print('Pose', pose)
                pose = [pose[0].tolist(), pose[1].tolist()]
                with open(
                        '{target_folder}/vecs.{count}.txt'.format(
                            count=count, target_folder=target_folder),
                        'w') as f:
                    f.write(json.dumps(pose))
                with open(
                        '{target_folder}/marks.{count}.txt'.format(
                            count=count, target_folder=target_folder),
                        'w') as f:
                    f.write(json.dumps(marks.tolist()))
                cv2.imwrite(
                    '{target_folder}/image.{count}.png'.format(
                        count=count, target_folder=target_folder), frame)
            count += 1
示例#15
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    instructions = 0
    print "--- Intention classification for communication between a human and a robot ---"
    if instructions == 1:
        print "First you will be required to present a facial expression before you will do a head movement."
        print "If done correctly these gestures will be detected by the robot and it will perform the desired task."
        raw_input("Press Enter to continue...")

    if instructions == 1:
        print "This is the module for facial expression recognition."
        print "This program can detect the emotions: Happy and Angry."
        print "The program will look for the expression for 3 seconds."
    raw_input("To proceed to Facial Expression Recognition press Enter...")
    predictExp = 0

    # Load Facial Expression Recognition trained model
    print "- Loading FER model... -"
    #faceExpModel = tf.keras.models.load_model("/home/bjornar/ML_models/FER/Good models(80+)/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5")
    faceExpModel = tf.keras.models.load_model(
        "/home/project/Bjorn/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5"
    )

    # Load Face Cascade for Face Detection
    print "- Loading Face Cascade for Face Detection... -"
    #cascPath = "/home/bjornar/MScDissertation/TrainingData/FaceDetection/haarcascade_frontalface_default.xml"
    cascPath = "/home/project/Bjorn/IntentionClassification-Repository/haarcascade_frontalface_default.xml"
    faceCascade = cv2.CascadeClassifier(cascPath)

    ## Initializing Head Movement variables
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    sample_frame = cv2.imread("sample_frame.png")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]

    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    if camera == 'kinect':
        ## Initialize Kinect camera
        print "Initializing camera..."
        try:
            from pylibfreenect2 import OpenGLPacketPipeline
            pipeline = OpenGLPacketPipeline()
        except:
            try:
                from pylibfreenect2 import OpenCLPacketPipeline
                pipeline = OpenCLPacketPipeline()
            except:
                from pylibfreenect2 import CpuPacketPipeline
                pipeline = CpuPacketPipeline()
        #print("Packet pipeline:", type(pipeline).__name__)

        # Create and set logger
        #logger = createConsoleLogger(LoggerLevel.Debug)
        setGlobalLogger()

        fn = Freenect2()
        num_devices = fn.enumerateDevices()
        if num_devices == 0:
            print("- No device connected! -")
            sys.exit(1)

        serial = fn.getDeviceSerialNumber(0)
        device = fn.openDevice(serial, pipeline=pipeline)

        listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir
                                          | FrameType.Depth)

        # Register listeners
        device.setColorFrameListener(listener)
        device.setIrAndDepthFrameListener(listener)

        device.start()

        # NOTE: must be called after device.start()
        registration = Registration(device.getIrCameraParams(),
                                    device.getColorCameraParams())

    elif camera == 'webcam':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(-1)

    elif camera == 'video':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(
            "/home/project/Bjorn/SonyHandycamTest.mp4")

    ## Facial Expression Recognition variables
    FER_prediction = []
    FERclass = ''
    FERstart = 0
    classifyMoves = 0

    ## Head movement variables
    predictHeadMov = 3
    HeadMov = []
    HMCclass = ''
    detectedFaces = []
    mark = []

    notDone = 0
    progressStatus = [0, 0]

    while notDone in progressStatus:  # This waits for each module to finsih
        if camera == 'kinect':
            frames = listener.waitForNewFrame()

            color = frames["color"]
            color = color.asarray()
            color = cv2.resize(color, (int(873), int(491)))
            color = color[0:480, 150:790]
            color = np.delete(color, np.s_[3::], 2)

        elif camera == 'webcam':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        elif camera == 'video':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        ## Detect facial expression
        predictExpNums = [1, 2]
        if predictExp == 0:
            while predictExp not in predictExpNums:
                predictExp = int(
                    raw_input(
                        "\nPress 1 to detect Facial Expression or press 2 to do Head Movement classification."
                    ))
            if predictExp == 1:
                predictExp = 1
                print "------ Facial Expression Recognition ------"
            elif predictExp == 2:
                predictHeadMov = 0
                progressStatus[0] = 1

            FERstart = time.time()

        elif predictExp == 1:
            currentTime = time.time()
            if currentTime - FERstart < 10:
                FER_prediction.append(
                    facialExpressionRecogntion(color, faceExpModel,
                                               faceCascade))
            else:
                predictExp = 2
                FER_prediction = filter(None, FER_prediction)
                FERclass = mostCommon(FER_prediction)
                FERclass = FERclass[2:7]
                predictHeadMov = 0
                if FERclass == '':
                    print("Did not detect an expression, try again.")
                    predictExp = 0
                else:
                    progressStatus[0] = 1
                    print "Detected expression: " + str(FERclass)
                    print "Progress: FER DONE"
                # else:
                #     #cv2.imwrite("sample_frame.png", color)
                #     break

        ## Detect head movement class
        if predictHeadMov == 0:
            predictHeadMov = int(
                raw_input(
                    "\nPress 1 to do Head Movement classification or 2 to skip."
                ))
            if predictHeadMov == 1:
                predictHeadMov = 1
                print "------ Head Movement Classification ------"
                moveTime = int(
                    raw_input(
                        "How many seconds should I record your movement?"))
                #moveTime = 2
            else:
                predictHeadMov = 2
            timer = time.time()
            previousTime1 = timer
            previousTime2 = timer

        if predictHeadMov == 1:
            print color.shape
            color = color[0:480, 0:480]
            color = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY)
            cv2.imshow("", color)
            raw_input()
            print color.shape
            # Feed frame to image queue.
            img_queue.put(color)

            #pdb.set_trace()

            # Get face from box queue.
            facebox = box_queue.get()
            print color.shape

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = color[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                marks = mark_detector.detect_marks(face_img)

                #Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(color, headPoseDirection, (20, 70), font, 1,
                            (0, 255, 0), 4)

                # # Get average position of nose
                noseMarksTemp = []
                noseMarksTemp.append(marks[30][0])
                noseMarksTemp.append(marks[30][1])
                noseMarks[0] = noseMarksTemp

                for i in range(9, 0, -1):
                    noseMarks[i] = noseMarks[i - 1]

                # Get the direction of head movement
                headPoseDirection = calculateDirection(noseMarks)

                directionArray.append(headPoseDirection)

                if classifyMoves == 0:
                    classifyMoves = 1
                    timer = time.time()
                    previousTime1 = timer
                    previousTime2 = timer

                currentTime = time.time()
                if currentTime - previousTime1 > moveTime and classifyMoves == 1:
                    print "------------------------------------------------"
                    print "Elapsed timer 1: " + str(currentTime -
                                                    previousTime1)

                    # Get most common direction
                    HMCclass = mostCommon(directionArray)

                    classifyMoves = 2
                    directionArray = []
                    previousTime1 = currentTime

                    progressStatus[1] = 1
                    print "Progress: HMC DONE"
            else:
                print "Did not detect a face"
        elif predictHeadMov == 2:
            progressStatus[1] = 1
            print "Skipped Head Movement Estimation."
            break

        # if notDone in progressStatus and predictHeadMov == 2 and predictExp == 2:
        #     print "You seem to have skipped one or more tasks."
        #     inpt = ''
        #     while inpt == '':
        #         inpt = raw_input("To do FER press 1 and to do HMC press 2...")
        #         if input == '1':
        #             predictExp = 1
        #         elif input == '2':
        #             predictHeadMov

        cv2.imshow("", color)
        if camera == 'kinect':
            listener.release(frames)

        key = cv2.waitKey(delay=1)
        if key == ord('q'):
            break

    if camera == 'kinect':
        listener.release(frames)
        device.stop()
        device.close()
    elif camera == 'webcam' or camera == 'video':
        video_capture.release()

    cv2.destroyAllWindows()

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    print "---------------- RESULT ----------------"

    if FERclass != '':
        print "Detected facial expression: " + str(FERclass)
    else:
        print "Did not detect any expression."

    if HMCclass != '':
        print "Detected head movement: " + str(HMCclass)
    else:
        print "Did not detect a head movement."

    print "----------------------------------------"

    intentionClassification = [FERclass, HMCclass]

    return intentionClassification
示例#17
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))
            right_corner = tuple([int(i) for i in marks[36]])
            left_corner = tuple([int(i) for i in marks[45]])
            # print(marks[36], marks[45])
            cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2)

            pixel_distance = int(
                math.sqrt((right_corner[0] - left_corner[0])**2 +
                          (right_corner[1] - left_corner[1])**2))
            estimated_distance = (real_width * focal_length) / pixel_distance

            cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            print(steady_pose[1])
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            angles = process_eyes(frame, marks)
            if bool(angles) is True:
                # print(angles)
                angles = cvt_to_radians(angles)
                rotated_vector = rotate_vector(steady_pose[0],
                                               angles['right'][0],
                                               angles['right'][1])
                shifted_translation_vector = np.copy(steady_pose[1])
                shifted_translation_vector[0] += 50
                shifted_translation_vector[1] += 50
                pose_estimator.draw_axes(frame, rotated_vector,
                                         shifted_translation_vector)

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def headPoseEstimation():
    print("HEAD POSE ESTIMATION...")
    """MAIN"""
    # Video source from webcam or video file.
    #video_src = 0
    #video_src = 'EWSN.avi'
    #cam = cv2.VideoCapture(video_src)
    #_, sample_frame = cam.read()

    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    print("1")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    print("2")
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    print("3")
    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    # start = time.time()
    # previousTime1 = start
    # previousTime2 = start
    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break
        #print("4")
        #print(frame.shape)
        #print("5")
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        #if video_src == 0:
        #    print("6")
        #    frame = cv2.flip(frame, 2)
        #    cv2.imwrite("Preview.png", frame)
        #    print("7")

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)
        #print("8")
        # Get face from box queue.
        facebox = box_queue.get()
        #print(type(facebox)
        #print(facebox
        #print("9")
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            print(face_img.shape)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # # Get average position of nose
            noseMarksTemp = []
            noseMarksTemp.append(marks[30][0])
            noseMarksTemp.append(marks[30][1])
            noseMarks[0] = noseMarksTemp

            for i in range(9, 0, -1):
                noseMarks[i] = noseMarks[i - 1]

            # Get the direction of head movement
            headPoseDirection = calculateDirection(noseMarks)
            #if headPoseDirection != 'still':
            directionArray.append(headPoseDirection)
            #print(directionArray
            #print(len(directionArray)
            print("To capture a movement press 'a' and perform a movement.")
            #currentTime1 = time.time()
            if cv2.waitKey(5) == ord('a') and not classifyMoves:
                classifyMoves = 1
                print("Start classifying movement...")
                timer = time.time()
                currentTime = timer
                previousTime1 = timer
                previousTime2 = timer

            if cv2.waitKey(5) == ord('b') and classifyMoves:
                classifyMoves = 0
                print("Stopped classifying movement...")

            currentTime = time.time()
            if currentTime - previousTime1 > 2 and classifyMoves:
                print("------------------------------------------------")
                print("Elapsed timer 1: " + str(currentTime - previousTime1))
                #print(len(directionArray)

                # Get most common direction
                moveClass = mostCommon(directionArray)
                #moveSequence.append(moveClass)
                print(moveClass)

                # Get a sequence of head movements
                # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0:
                #     print("Elapsed timer 2: " + str(currentTime - previousTime2)
                #     numMoves = len(moveSequence)
                #     moves = moveSequence[(numMoves-5):(numMoves-1)]
                #     print(moves
                #     moveSequence = []
                #     previousTime2 = currentTime
                #     classifyMoves = 0
                classifyMoves = 0
                directionArray = []
                previousTime1 = currentTime

                print(
                    "To continue type 'c' or to recapture a movement type 'a'."
                )

            if cv2.waitKey(5) == ord('c'):
                break

            #print(previousTime
            # Uncomment following line to show raw marks.
            #mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # if len(moves) > 1:
        #     cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4)

        cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0),
                    4)

        # Show preview.
        #cv2.namedWindow("", cv2.WINDOW_NORMAL)
        cv2.imshow("Preview", frame)
        #cv2.resizeWindow("preview", 5000,5000)
        if cv2.waitKey(5) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    return moveClass
示例#19
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
示例#23
0
    def cameraCap(self):
        self.uId = self.userId.get()
        if self.uId != '':
            if self.outputLabel != None:
                self.outputLabel.destroy()

            self.outputLabel = Label(self.framePic, text="Here We Start")
            self.outputLabel.config(font=("Courier", 44))
            self.outputLabel.place(x=400, y=100)

            mark_detector = MarkDetector()
            name = self.uId
            directory = os.path.join(facepath, name)

            if not os.path.exists(facepath):
                os.makedirs(facepath, exist_ok='True')

            if not os.path.exists(directory):
                try:
                    os.makedirs(directory, exist_ok='True')
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        print('invalid student id or access denied')
                        return

            poses = ['frontal', 'right', 'left', 'up', 'down']
            file = 0

            ret, sample_frame = self.cap.read()
            i = 0
            count = 0
            if ret == False:
                return

            # Introduce pose estimator to solve pose. Get one frame to setup the
            # estimator according to the image size.
            height, width = sample_frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))

            # Introduce scalar stabilizers for pose.
            pose_stabilizers = [
                Stabilizer(state_num=2,
                           measure_num=1,
                           cov_process=0.1,
                           cov_measure=0.1) for _ in range(6)
            ]
            images_saved_per_pose = 0
            number_of_images = 0

            shape_predictor = dlib.shape_predictor(
                "shape_predictor_68_face_landmarks.dat")
            face_aligner = FaceAligner(shape_predictor,
                                       desiredFaceWidth=FACE_WIDTH)
            while i < 5:
                saveit = False
                # Read frame, crop it, flip it, suits your needs.
                ret, frame = self.cap.read()
                if ret is False:
                    break
                if count % 5 != 0:  # skip 5 frames
                    count += 1
                    continue
                if images_saved_per_pose == IMAGE_PER_POSE:
                    i += 1
                    images_saved_per_pose = 0

                # If frame comes from webcam, flip it so it looks like a mirror.
                if file == 0:
                    frame = cv2.flip(frame, 2)
                original_frame = frame.copy()
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                facebox = mark_detector.extract_cnn_facebox(frame)

                if facebox is not None:
                    # Detect landmarks from image of 128x128.
                    x1 = max(facebox[0] - 0, 0)
                    x2 = min(facebox[2] + 0, width)
                    y1 = max(facebox[1] - 0, 0)
                    y2 = min(facebox[3] + 0, height)

                    face = frame[y1:y2, x1:x2]
                    face_img = cv2.resize(face,
                                          (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                    face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                    marks = mark_detector.detect_marks([face_img])

                    # Convert the marks locations from local CNN to global image.
                    marks *= (facebox[2] - facebox[0])
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]

                    # Try pose estimation with 68 points.
                    pose = pose_estimator.solve_pose_by_68_points(marks)

                    # Stabilize the pose.
                    steady_pose = []
                    pose_np = np.array(pose).flatten()
                    for value, ps_stb in zip(pose_np, pose_stabilizers):
                        ps_stb.update([value])
                        steady_pose.append(ps_stb.state[0])
                    steady_pose = np.reshape(steady_pose, (-1, 3))

                    # print(steady_pose[0][0])
                    # if steady_pose[0][0]>0.1:
                    #     print('right')
                    # else:
                    #     if steady_pose[0][0]<-0.1:
                    #         print('left')
                    # if steady_pose[0][1]>0.1:
                    #     print('down')
                    # else:
                    #     if steady_pose[0][1]<-0.1:
                    #         print('up')
                    # print(steady_pose[0])
                    if i == 0:
                        if abs(steady_pose[0][0]) < ANGLE_THRESHOLD and abs(
                                steady_pose[0][1]) < ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 1:
                        if steady_pose[0][0] > ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 2:
                        if steady_pose[0][0] < -ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 3:
                        if steady_pose[0][1] < -ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 4:
                        if steady_pose[0][1] > ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    # Show preview.
                    if i >= 5:
                        print('Thank you')
                        if self.outputLabel != None:
                            self.outputLabel.destroy()

                        self.outputLabel = Label(self.framePic, text="Thanks")
                        self.outputLabel.config(font=("Courier", 44))
                        self.outputLabel.place(x=400, y=100)
                        break

                    frame = cv2.putText(
                        frame, poses[i] + ' : ' + str(images_saved_per_pose) +
                        '/' + str(IMAGE_PER_POSE), (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 1,
                        cv2.LINE_AA)
                    frame = cv2.rectangle(frame, (x1, y1), (x2, y2),
                                          (255, 255, 0), 2)
                    frame = imutils.resize(frame, width=300, height=300)
                    # OpenCV represents images in BGR order; however PIL
                    # represents images in RGB order, so we need to swap
                    # the channels, then convert to PIL and ImageTk format
                    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    image = Image.fromarray(image)
                    image = ImageTk.PhotoImage(image)
                    # if the panel is not None, we need to initialize it
                    if self.panel is None:
                        self.panel = Label(self.framePic, image=image)
                        self.panel.image = image
                        self.panel.pack(side=LEFT, padx=0, pady=0)
                        print("Done")
                    # otherwise, simply update the panel
                    else:
                        self.panel.configure(image=image)
                        self.panel.image = image

                    if saveit:
                        face = dlib.rectangle(x1, y1, x2, y2)
                        face_aligned = face_aligner.align(
                            original_frame, frame_gray, face)
                        cv2.imwrite(
                            os.path.join(
                                directory,
                                str(name) + '_' + str(number_of_images) +
                                '.jpg'), face_aligned)
                        print(images_saved_per_pose)
                        number_of_images += 1

            self.cap.release()
        else:
            if self.outputLabel != None:
                self.outputLabel.destroy()

            self.outputLabel = Label(self.framePic,
                                     text="Please Enter a Valid Id")
            self.outputLabel.config(font=("Courier", 44))
            self.outputLabel.place(x=400, y=100)