def get_all_poses(self, image, mark_detector=None, face_detector=None, draw=False, draw_marks=False): """ Get all the poses in an image """ CNN_INPUT_SIZE = 128 print("Initializing detectors") # Introduce mark_detector to detect landmarks. if mark_detector is None: from .mark_detector import MarkDetector mark_detector = MarkDetector() if face_detector is None: from .mark_detector import FaceDetector face_detector = FaceDetector() confidences, faceboxes = face_detector.get_faceboxes( image, threshold=0.2) poses = [] print("{} FACEBOXES".format(len(faceboxes))) for facebox in faceboxes: if min(facebox) < 0: continue # Detect landmarks from image of 128x128. face_img = image[facebox[1]: facebox[3], facebox[0]: facebox[2]] if not face_img.shape[0] or not face_img.shape[1]: continue face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] if draw_marks: mark_detector.draw_marks(image, marks, color=(0, 0, 255)) detected_face = image[facebox[1]: facebox[3], facebox[0]: facebox[2]] # crop detected face detected_face = cv2.cvtColor( detected_face, cv2.COLOR_BGR2GRAY) # transform to gray scale detected_face = cv2.resize( detected_face, (48, 48)) # resize to 48x48 # Try pose estimation with 68 points. pose = self.solve_pose_by_68_points(marks) #pose = pose_estimator.solve_pose(marks) poses.append((pose[0].copy(), pose[1].copy())) if draw: self.draw_annotation_box( image, pose[0], pose[1], color=(255, 128, 128)) return poses
def main(strargument): shutil.rmtree("./test") os.mkdir("./test") os.remove("result.txt") f = open("result.txt", "a") cap = cv2.VideoCapture(strargument) # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #cap = cv2.VideoCapture("NTQ.mkv") #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi") #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg") # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview") success, frame = cap.read() startId = countIdFolder("./face_db/") # quit if unable to read the video file if not success: print('Failed to read video') sys.exit(1) #The color of the rectangle we draw around the face rectangleColor = (0, 165, 255) #variables holding the current frame number and the current faceid frameCounter = 0 currentFaceID = 0 #Variables holding the correlation trackers and the name per faceid conf = configparser.ConfigParser() conf.read("config/main.cfg") mtcnn_detector = load_mtcnn(conf) MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH") VERIFICATION_THRESHOLD = float( conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD")) FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH") BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH")) MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE")) MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL")) YAWL = int(conf.get("CUSTOM", "YAWL")) YAWR = int(conf.get("CUSTOM", "YAWR")) PITCHL = int(conf.get("CUSTOM", "PITCHL")) PITCHR = int(conf.get("CUSTOM", "PITCHR")) ROLLL = int(conf.get("CUSTOM", "ROLLL")) ROLLR = int(conf.get("CUSTOM", "ROLLR")) MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED")) IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH")) EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y")) EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X")) SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH")) MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN")) MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE")) LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME")) ROIXL = int(conf.get("CUSTOM", "ROIXL")) ROIXR = int(conf.get("CUSTOM", "ROIXR")) ROIYB = int(conf.get("CUSTOM", "ROIYB")) ROIYA = int(conf.get("CUSTOM", "ROIYA")) maxDisappeared = MAXDISAPPEARED ## khong xuat hien toi da 100 frame faces_db = load_faces(FACE_DB_PATH, mtcnn_detector) # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector) time.sleep(10) for item in faces_db: print(item["name"]) listTrackedFace = [] mark_detector = MarkDetector() tm = cv2.TickMeter() _, sample_frame = cap.read() height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) with tf.Graph().as_default(): with tf.Session() as sess: load_mobilefacenet(MODEL_PATH) inputs_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") try: start = time.time() while True: start1 = time.time() retval, frame = cap.read() #Increase the framecounter frameCounter += 1 if retval: _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR] cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB), (0, 0, 255), 2) good_face_index = [] # faces_db = load_face_db.face_db if (frameCounter % 1) == 0: ### embed and compare name for i, face_db in enumerate(faces_db): if not os.path.isdir( "./face_db/" + face_db["name"].split("_")[0]): faces_db.pop(i) faces, landmarks = mtcnn_detector.detect(_frame) if faces.shape[0] is not 0: input_images = np.zeros( (faces.shape[0], 112, 112, 3)) save_images = np.zeros( (faces.shape[0], 112, 112, 3)) (yaw, pitch, roll) = (0, 0, 0) for i, face in enumerate(faces): if round(faces[i, 4], 6) > IS_FACE_THRESH: bbox = faces[i, 0:4] points = landmarks[i, :].reshape( (5, 2)) nimg = face_preprocess.preprocess( _frame, bbox, points, image_size='112,112') save_images[i, :] = nimg nimg = nimg - 127.5 nimg = nimg * 0.0078125 input_images[i, :] = nimg (x1, y1, x2, y2) = bbox.astype("int") if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2: continue temp = int((y2 - y1) / EXTEND_Y) y1 = y1 + temp y2 = y2 + temp temp = int((x2 - x1) / EXTEND_X) if x1 > temp: x1 = x1 - temp x2 = x2 + temp # cv2.imshow("mainframe",frame) # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2]) face_img = cv2.resize( _frame[y1:y2, x1:x2], (128, 128)) # cv2.imshow("ok",face_img) face_img = cv2.cvtColor( face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks( [face_img]) tm.stop() marks *= (x2 - x1) marks[:, 0] += x1 marks[:, 1] += y1 # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) pose, ( yaw, pitch, roll ) = pose_estimator.solve_pose_by_68_points( marks) # temp = frame # cv2.putText(temp,"yaw: "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"roll: "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.imshow("frame",temp) # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR): # good_face_index.append(i) # cv2.waitKey(0) # print(good_face_index) feed_dict = {inputs_placeholder: input_images} emb_arrays = sess.run(embeddings, feed_dict=feed_dict) emb_arrays = sklearn.preprocessing.normalize( emb_arrays) names = [] sims = [] for i, embedding in enumerate(emb_arrays): # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False: # names.append(listTrackedFace[i].name) # continue embedding = embedding.flatten() temp_dict = {} for com_face in faces_db: ret, sim = feature_compare( embedding, com_face["feature"], 0.65) temp_dict[com_face["name"]] = sim # print(temp_dict) dictResult = sorted(temp_dict.items(), key=lambda d: d[1], reverse=True) # print(dictResult[:5]) name = "" if len(dictResult) > 0 and dictResult[0][ 1] > VERIFICATION_THRESHOLD: name = dictResult[0][ 0] #.split("_")[0] sim = dictResult[0][1] ## wite log t = time.time() f.write(name + "___" + str((t - start) // 60) + ":" + str(int(t - start) % 60) + "\n") else: name = "unknown" sim = 0 names.append(name) sims.append(sim) # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:]) # if len(dictResult)>0 : # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:]) ################################ tracker for i, embedding in enumerate(emb_arrays): embedding = embedding.flatten() ResultDict = {} for objectTrackFace in listTrackedFace: tempList = [] (x1, y1, x2, y2) = objectTrackFace.latestBox for com_face in objectTrackFace.listEmbedding: ret, sim = feature_compare( embedding, com_face, 0.65) tempList.append(sim) tempList.sort(reverse=True) if len(tempList) > 0: if tempList[0] > 0.9 or ( similarIOU( faces[i, :4].astype( "int"), objectTrackFace. latestBox) and (frameCounter - objectTrackFace .latestFrameCounter) < 3): ResultDict[objectTrackFace. name] = tempList[0] dictResult = sorted(ResultDict.items(), key=lambda d: d[1], reverse=True) if True: if len( ResultDict ) > 0 and dictResult[0][ 1] > SIMILAR_THRESH: ## neu khop -- 0.5 # for ik in range(len(dict)): # if dict[ik][1]>SIMILAR_THRESH: nameTrackCurrent = dictResult[0][0] for index, tempFaceTrack in enumerate( listTrackedFace): if tempFaceTrack.name == nameTrackCurrent: if len(tempFaceTrack. listImage ) > MAX_LIST_LEN: tempFaceTrack.listImage.pop( 0) tempFaceTrack.listEmbedding.pop( 0) if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) else: if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) if names[i] != "unknown": if RepresentsInt( nameTrackCurrent ): tempFaceTrack.name = names[ i] # else: ################# # names[i] = nameTrackCurrent else: if not RepresentsInt( nameTrackCurrent ): names[ i] = nameTrackCurrent tempFaceTrack.countDisappeared = 0 tempFaceTrack.latestBox = faces[ i, 0:4].astype("int") tempFaceTrack.latestFrameCounter = frameCounter tempFaceTrack.liveTime = 0 tempFaceTrack.justAdded = True ## but we still action with it break else: ## neu khong khop thi tao moi nhung chi them anh khi mat du tot if len(ResultDict) > 0: print(dictResult[0][1]) if names[i] != "unknown": newTrackFace = trackedFace( names[i]) else: newTrackFace = trackedFace( str(currentFaceID)) currentFaceID = currentFaceID + 1 if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): newTrackFace.listImage.append( save_images[i, :]) newTrackFace.listEmbedding.append( emb_arrays[i]) newTrackFace.latestBox = faces[ i, 0:4].astype("int") newTrackFace.latestFrameCounter = frameCounter # print(newTrackFace.latestBox) newTrackFace.justAdded = True listTrackedFace.append( newTrackFace) ## add list ### disappeared for index, trackFace in enumerate( listTrackedFace): if trackFace.justAdded == False: trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 else: trackFace.justAdded = False if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t for i, face in enumerate(faces): x1, y1, x2, y2 = faces[i][0], faces[i][ 1], faces[i][2], faces[i][3] x1 = max(int(x1), 0) y1 = max(int(y1), 0) x2 = min(int(x2), _frame.shape[1]) y2 = min(int(y2), _frame.shape[0]) cv2.rectangle(frame, (x1 + ROIXL, y1 + ROIYA), (x2 + ROIXL, y2 + ROIYA), (0, 255, 0), 2) # if i in good_face_index: # if not RepresentsInt(names[i]): cv2.putText(frame, names[i].split("_")[0], (int(x1 / 2 + x2 / 2 + ROIXL), int(y1 + ROIYA)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) else: for index, trackFace in enumerate( listTrackedFace): trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t continue if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t end = time.time() cv2.putText(frame, "FPS: " + str(1 // (end - start1)), (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.putText( frame, "Time: " + str((end - start) // 60) + ":" + str(int(end - start) % 60), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.imshow("frame", frame) key = cv2.waitKey(30) if key & 0xFF == ord('q'): break if key == 32: cv2.waitKey(0) else: break except KeyboardInterrupt as e: pass gc.collect() cv2.destroyAllWindows() exit(0)
def Image_Database_Generator(): mark_detector = MarkDetector() video_capture = cv2.VideoCapture(file) name = input("Enter student id:") directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok='True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok='True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return number_of_images = 0 MAX_NUMBER_OF_IMAGES = 50 count = 0 count = 0 while number_of_images < MAX_NUMBER_OF_IMAGES: ret, frame = video_capture.read() count += 1 if ret == False: break if count % 5 != 0: continue frame = cv2.flip(frame, 1) height, width, _ = frame.shape frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) original_frame = frame.copy() height, width = frame.shape[:2] shape_predictor = dlib.shape_predictor( "shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1 = max(facebox[0] - 0, 0) x2 = min(facebox[2] + 0, width) y1 = max(facebox[1] - 0, 0) y2 = min(facebox[3] + 0, height) face = frame[y1:y2, x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Show preview. if showit: frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.imshow("Preview", frame) face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(original_frame, frame_gray, face) cv2.imwrite( os.path.join(directory, str(name) + '_' + str(number_of_images) + '.jpg'), face_aligned) number_of_images += 1 print(number_of_images) if cv2.waitKey(10) == 27: break print('Thank you') video_capture.release() cv2.destroyAllWindows()
# 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. marks = np.array([[218.4677, 89.87073], [220.22453, 121.96762],
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \ # queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480,640, 3)) print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480*2, 640*2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes, landmark = self.detector.detect(frame, \ thresh, scales=scales, do_flip=False) mess = "Not detect pose" if faceboxes is not None and len(faceboxes) > 0: #if isinstance(faceboxes[1], int): # faceboxes = [faceboxes] for facebox in faceboxes: facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess=round(1/(t2-t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)
def main(): ''' main function''' img_dir = "img_dir/" # image file dir # setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_list = [] for filename in os.listdir(img_dir): frame = cv2.imread(img_dir + filename) img_list.append(frame) img_queue.put(frame) box_process = Process(target=get_face, args=(img_queue, box_queue)) box_process.start() # introduce mark_detector to detect landmarks mark_detector = MarkDetector() while True: # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip # frame = cv2.flip(frame, 2) if len(img_list) != 0: frame = img_list.pop(0) raw_frame = frame.copy() # introduce pos estimator to solve pose. Get one frames to setup the # estimator according to the images size. height, width = frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) else: break # Pose estimation by 3 steps. # 1. detect faces. # 2. detect landmarks. # 3. estimate pose. # get face from box queue. faceboxes = box_queue.get() # print("the length of facebox is " + str(len(faceboxes))) for facebox in faceboxes: # detect landmarks from image 128 * 128 face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] # cut off the area of face. face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) # BGR -> RGB marks = mark_detector.detect_marks(face_img) # covert the marks locations from local CNN to global image. marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle. marks[:, 1] *= (facebox[3] - facebox[1]) # the length of rectangle. marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # uncomment following line to show raw marks. # util.draw_marks(frame, marks, color=(0, 255, 0)) # util.draw_faceboxes(frame, facebox) # try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # pose[0] is rotation_vector, and pose[1] is translation_vector pose_np = np.array(pose).flatten() pose = np.reshape(pose_np, (-1, 3)) angle = util.get_angle(pose[0]) pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2]) # show preview cv2.imshow("Preview", frame) cv2.imshow("raw_frame", raw_frame) if cv2.waitKey(0) == 27: continue # clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): # Initiate Class for Face & Mark Detector face_detector = FaceDetector() mark_detector = MarkDetector() # Landmark 3D for projection and landmark 2d index of corresponding mark landmarks_3d = np.array( [ [0.000000, 0.000000, 6.763430], # 33 nose bottom edge [6.825897, 6.760612, 4.402142], # 17 left brow left corner [1.330353, 7.122144, 6.903745], # 21 left brow right corner [-1.330353, 7.122144, 6.903745], # 22 right brow left corner [-6.825897, 6.760612, 4.402142], # 26 right brow right corner [5.311432, 5.485328, 3.987654], # 36 left eye left corner [1.789930, 5.393625, 4.413414], # 39 left eye right corner [-1.789930, 5.393625, 4.413414], # 42 right eye left corner [-5.311432, 5.485328, 3.987654], # 45 right eye right corner [2.005628, 1.409845, 6.165652], # 31 nose left corner [-2.005628, 1.409845, 6.165652], # 35 nose right corner [2.774015, -2.080775, 5.048531], # 48 mouth left corner [-2.774015, -2.080775, 5.048531], # 54 mouth right corner [0.000000, -3.116408, 6.097667], # 57 mouth central bottom corner [0.000000, -7.415691, 4.070434] # 8 chin corner ], dtype=np.double) lm_2d_index = [33, 17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8] # Define color for facebox color = (244, 134, 66) # Initiate Video Streaming file_name = 'assets/trump.gif' vs = FileVideoStream(file_name).start() time.sleep(2.0) frame = vs.read() while frame is not None: frame = imutils.resize(frame, width=800, height=600) (H, W) = frame.shape[:2] faceboxes = face_detector.extract_square_facebox(frame) if faceboxes is not None: for facebox in faceboxes: face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] marks = mark_detector.detect_marks(face_img) marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] rvec, tvec, cm, dc = get_headpose(h=H, w=W, landmarks_2d=marks, landmarks_3d=landmarks_3d, lm_2d_index=lm_2d_index) draw_front_box(frame, color, rvec, tvec, cm, dc) cv2.imshow("3D Face Box", frame) # writer.write(frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break if key == ord("w"): frame = vs.read()
def Camera_Capture(): mark_detector = MarkDetector() name = input("Enter student id:") directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok = 'True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok = 'True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return poses=['frontal','right','left','up','down'] file=0 cap = cv2.VideoCapture(file) ret, sample_frame = cap.read() i = 0 count = 0 if ret==False: return # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] images_saved_per_pose=0 number_of_images = 0 shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) while i<5: saveit = False # Read frame, crop it, flip it, suits your needs. ret, frame = cap.read() if ret is False: break if count % 5 !=0: # skip 5 frames count+=1 continue if images_saved_per_pose==IMAGE_PER_POSE: i+=1 images_saved_per_pose=0 # If frame comes from webcam, flip it so it looks like a mirror. if file == 0: frame = cv2.flip(frame, 2) original_frame=frame.copy() frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1=max(facebox[0]-0,0) x2=min(facebox[2]+0,width) y1=max(facebox[1]-0,0) y2=min(facebox[3]+0,height) face = frame[y1: y2,x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # print(steady_pose[0][0]) # if steady_pose[0][0]>0.1: # print('right') # else: # if steady_pose[0][0]<-0.1: # print('left') # if steady_pose[0][1]>0.1: # print('down') # else: # if steady_pose[0][1]<-0.1: # print('up') # print(steady_pose[0]) if i==0: if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==1: if steady_pose[0][0]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==2: if steady_pose[0][0]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==3: if steady_pose[0][1]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==4: if steady_pose[0][1]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True # Show preview. if i>=5: print ('Thank you') break frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA) frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2) cv2.imshow("Preview", frame) if saveit: face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(original_frame, frame_gray, face) cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned) print(images_saved_per_pose) number_of_images+=1 if cv2.waitKey(10) == 27: break cap.release() cv2.destroyAllWindows()
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \ queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans, parts=[4, 7]): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None image_h, image_w, _ = img.shape if parts is None or len(parts) == 0: return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) else: for human in humans: for part in parts: if part in human.body_parts.keys(): body_part = human.body_parts[part] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) img = cv2.circle(img, coord, 2, (0, 255, 0)) return img def color_callback( self, data ): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480, 640, 3)) #print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def get_top_vertice_human(self, human, img_w, img_h): sorted_id = sorted(human["pose"].body_parts.keys()) body_part = human["pose"].body_parts[sorted_id[0]] return (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) def is_gazing(self, face_direction, human, img): def get_angle_direct_with_peakhand(face_direction, peak): d0, d1 = face_direction peak_direction = (peak[0] - d0[0], peak[1] - d0[1]) face_direct = (d1[0] - d0[0], d1[1] - d0[1]) d_peak_direction = np.sqrt(peak_direction[0]**2 + peak_direction[1]**2) d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2) return (peak_direction[0]*face_direct[0] + \ peak_direction[1]*face_direct[1])/(d_peak_direction*\ d_face_direct) thresh = 0.1 #0.5 #cos(PI/3) image_h, image_w, _ = img.shape if 4 in human.body_parts.keys(): body_part = human.body_parts[4] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle left = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh if 7 in human.body_parts.keys(): body_part = human.body_parts[7] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle right = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh return False def get_gazing_status(self, face_directions, face_coords, humans, img): def get_dist_to_humans(human_coords, coord): result = [] for _coord in human_coords: result.append(np.sqrt((coord[0]-_coord[0])**2) + \ (coord[1]-_coord[1])**2) return result image_h, image_w, _ = img.shape gazing_status = [] human_coords = [] for human in humans: sorted_id = sorted(human.body_parts.keys()) body_part = human.body_parts[sorted_id[0]] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) human_coords.append(coord) for fdirection, fcoord in zip(face_directions, face_coords): dists = get_dist_to_humans(human_coords, fcoord) idx = np.argmin(dists) gazing_status.append(self.is_gazing(fdirection, humans[idx], img)) del human_coords[idx] return gazing_status def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None or self.humans is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480 * 2, 640 * 2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes = self.mark_detector.extract_cnn_facebox(frame) mess = "Not detect pose" face_directions = [] face_coords = [] if faceboxes is not None and len(faceboxes) > 0: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: #facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_coords.append((int((facebox[0]+facebox[2])/2), \ int((facebox[1]+facebox[3])/2))) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. face_direction = self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) face_directions.append(face_direction) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) gazing_status = self.get_gazing_status(face_directions, face_coords, \ copy.deepcopy(self.humans), self.color_img) msg = GazingInfo() msg.is_gazing = [] msg.coords = [] for coord, gazestatus in zip(face_coords, gazing_status): _coord = Coord2D() _coord.x = coord[0] _coord.y = coord[1] msg.coords.append(_coord) msg.is_gazing.append(gazestatus) self.pub_headpose_info.publish(msg) print(gazing_status) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: engine.say( "Warning: video source not assigned, default webcam will be used") engine.runAndWait() video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() gaze = GazeTracking() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() head_flag = 0 gaze_flag = 0 while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] #audio_record(AUDIO_OUTPUT, 3) #sphinx_recog(AUDIO_OUTPUT) # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() gaze.refresh(frame) frame = gaze.annotated_frame() text = "" if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) rects = detector(gray, 0) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # for rect in rects: # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array shape = predictor(gray, rect) shape = face_utils.shape_to_np( shape) #converting to NumPy Array矩阵运算 mouth = shape[Start:End] leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] leftEAR = eye_aspect_ratio(leftEye) #眼睛长宽比 rightEAR = eye_aspect_ratio(rightEye) ear = (leftEAR + rightEAR) / 2.0 #长宽比平均值 lipdistance = lip_distance(shape) if (lipdistance > YAWN_THRESH): #print(lipdistance) flag0 += 1 print("yawning time: ", flag0) if flag0 >= 40: cv2.putText(frame, "Yawn Alert", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(frame, "Yawn Alert", (220, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) engine.say("don't yawn") engine.runAndWait() flag0 = 0 else: flag0 = 0 if (ear < thresh): flag += 1 print("eyes closing time: ", flag) if flag >= frame_check: cv2.putText(frame, "****************ALERT!****************", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(frame, "****************ALERT!****************", (10, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) engine.say("open your eyes") engine.runAndWait() flag = 0 else: flag = 0 if gaze.is_right(): print("Looking right") text = "Looking right" elif gaze.is_left(): print("Looking left") text = "Looking left" elif gaze.is_up(): text = "Looking up" else: text = "Looking center" if text is not "Looking center": gaze_flag += 1 if gaze_flag >= 20: engine.say("look forward") engine.runAndWait() gaze_flag = 0 else: gaze_flag = 0 marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # get angles angles = pose_estimator.get_angles(pose[0], pose[1]) if ((-8 > angles[0] or angles[0] > 8) or (-8 > angles[1] or angles[1] > 8)): head_flag += 1 if head_flag >= 40: print(angles[0]) engine.say("please look ahead") engine.runAndWait() else: head_flag = 0 # pose_estimator.draw_info(frame, angles) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) #pose_estimator.show_3d_model # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(1) & 0xFF == ord("q"): break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(video_src): """MAIN :param video_src: Source of video to analyze :type video_src: str or int""" cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() face_detector = FaceDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=(mark_detector, img_queue, box_queue,)) #box_process = Process(target=get_faces, args=(face_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] #face expression recognizer initialization from keras.models import model_from_json model = model_from_json(open("./model/facial_expression_model_structure.json", "r").read()) model.load_weights('./model/facial_expression_model_weights.h5') #load weights #----------------------------- emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral') while True: input() # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. #facebox = box_queue.get() faceboxes = dump_queue(box_queue) print("{} FACEBOXES".format(len(faceboxes))) for facebox in faceboxes: if min(facebox) < 0: continue # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] if not face_img.shape[0] or not face_img.shape[1]: continue face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) detected_face = frame[facebox[1]: facebox[3],facebox[0]: facebox[2]] #crop detected face detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48 # emotion estimation img_pixels = image.img_to_array(detected_face) img_pixels = np.expand_dims(img_pixels, axis = 0) img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1] predictions = model.predict(img_pixels) #store probabilities of 7 expressions #find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral max_index = np.argmax(predictions[0]) emotion = emotions[max_index] #write emotion text above rectangle #V_FONT_HERSHEY_SIMPLEX normal size sans-serif font #CV_FONT_HERSHEY_PLAIN small size sans-serif font #CV_FONT_HERSHEY_DUPLEX normal size sans-serif font (more complex than CV_FONT_HERSHEY_SIMPLEX ) #CV_FONT_HERSHEY_COMPLEX normal size serif font #CV_FONT_HERSHEY_TRIPLEX normal size serif font (more complex than CV_FONT_HERSHEY_COMPLEX ) #CV_FONT_HERSHEY_COMPLEX_SMALL smaller version of CV_FONT_HERSHEY_COMPLEX #CV_FONT_HERSHEY_SCRIPT_SIMPLEX hand-writing style font #CV_FONT_HERSHEY_SCRIPT_COMPLEX more complex variant of CV_FONT_HERSHEY_SCRIPT_SIMPLEX image_text = "" for index in range(len(emotions)): if predictions[0][index]>0.3: image_text += "{0} : {1} %\n".format(emotions[index], int(predictions[0][index]*100)) space = 0 for text in image_text.strip().split("\n"): cv2.putText( img = frame, text= text, org =(int(facebox[0]), int(facebox[1])-space), fontFace = cv2.FONT_HERSHEY_PLAIN, fontScale = 0.8, color = (255,255,255), thickness = 1 ) space += int(0.25*48) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) #pose = pose_estimator.solve_pose(marks) # Stabilize the pose. #stabile_pose = [] #pose_np = np.array(pose).flatten() #for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # stabile_pose.append(ps_stb.state[0]) #stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. #pose_estimator.draw_annotation_box( # frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
class HeadPoseEstimator(): def __init__(self): print "OpenCV version: {}".format(cv2.__version__) self.CNN_INPUT_SIZE = 128 self.mark_detector = MarkDetector() height = 480 width = 640 self.pose_estimator = PoseEstimator(img_size=(height, width)) self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(8) ] self.img_queue = Queue() self.box_queue = Queue() self.box_process = Process(target=self.get_face, args=( self.mark_detector, self.img_queue, self.box_queue, )) self.process_ctrl_flag = rospy.get_param("process_ctrl", True) self.process_ctrl_subscriber = rospy.Subscriber( "/head_pose_estimator/process_ctrl", Bool, self.process_ctrl_callback) self.sub_img_name = rospy.get_param("sub_img_name", "/usb_cam/image_raw") self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image, self.img_callback) self.show_result_img_flag = rospy.get_param("show_result_img", True) self.show_axis_flag = rospy.get_param("show_axis", True) self.show_annotation_box_flag = rospy.get_param( "show_annotation_box", True) self.pub_result_img_flag = rospy.get_param("pub_result_img", True) self.img_publisher = rospy.Publisher( "/head_pose_estimator/result_image", Image, queue_size=10) self.result_publisher = rospy.Publisher( "/head_pose_estimator/head_pose", HeadPose, queue_size=10) self.box_process.start() def get_face(self, detector, img_queue, box_queue): """ 画像キューから顔を取り出します。 処理はマルチプロセッシングで行います。 """ while not rospy.is_shutdown(): image = img_queue.get() box = detector.extract_cnn_facebox(image) box_queue.put(box) def process_ctrl_callback(self, msg): self.process_ctrl_flag = msg.data def euler_to_quaternion(self, euler): # オイラー角からクォータニオンに変換 # 鏡写しになるように値を調整 q = tf.transformations.quaternion_from_euler(euler.x, euler.y, euler.z) return Quaternion(x=q[1], y=-q[2], z=-q[3], w=q[0]) #return Quaternion(x=q[0], y=q[1], z=q[2], w=q[3]) def img_callback(self, img_msg): if self.process_ctrl_flag == True: try: cv_img = CvBridge().imgmsg_to_cv2(img_msg, "bgr8") flip_frame = cv2.flip(cv_img, 2) # 画像を鏡写しにする frame = cv_img img_height, img_width, _ = frame.shape[:3] self.img_queue.put(frame) facebox = self.box_queue.get() # 顔の検出に成功 if facebox is not None: # 画像から顔をトリミングしてCNNでlandmarksを見つける # img[top : bottom, left : right] face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize( face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) face_rect = FaceRect() face_rect.x = facebox[0] face_rect.y = facebox[1] face_rect.width = facebox[2] - facebox[0] face_rect.height = facebox[3] - facebox[1] #face_rect.x = img_width - facebox[0] - (facebox[2] - facebox[0]) #face_rect.y = facebox[1] #face_rect.width = facebox[2] - facebox[0] #face_rect.height = facebox[3] - facebox[1] marks = self.mark_detector.detect_marks([face_img]) marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. #self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. #self.mark_detector.draw_box(frame, [facebox]) pose = self.pose_estimator.solve_pose_by_68_points(marks) #rotation = pose[0] #euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2]) #quaternion = self.euler_to_quaternion(euler) # stabilize the pose steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) rotation = steady_pose[0] euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2]) head_rotation = self.euler_to_quaternion(euler) #print head_rotation #print "---" head_pose = HeadPose() head_pose.face_rect = face_rect head_pose.head_rotation = head_rotation self.result_publisher.publish(head_pose) # Uncomment following line to draw pose annotation on frame. #self.pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. if self.show_annotation_box_flag == True: self.pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # openCV3.4.0では動かない #self.pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) if self.show_axis_flag == True: self.pose_estimator.draw_axis(frame, steady_pose[0], steady_pose[1]) if self.show_result_img_flag == True: cv2.imshow("result", frame) cv2.waitKey(1) if self.pub_result_img_flag == True: try: pub_img = CvBridge().cv2_to_imgmsg(frame, "bgr8") self.img_publisher.publish(pub_img) except CvBridgeError, e: rospy.logerr(str(e)) except CvBridgeError, e: rospy.logerr(str(e))
def run(): # Load the parameters conf = config() # initialize dlib's face detector (HOG-based) and then create the # facial landmark predictor print("[INFO] loading facial landmark predictor...") detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(conf.shape_predictor_path) # grab the indexes of the facial landmarks for the left and # right eye, respectively (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] # initialize the video stream and sleep for a bit, allowing the # camera sensor to warm up # cap = cv2.VideoCapture(conf.vedio_path) cap = cv2.VideoCapture(0) if conf.vedio_path == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # sample_frame = imutils.rotate(sample_frame, 90) # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() # Gaze tracking gaze = GazeTracking() # loop over the frames from the video stream temp_steady_pose = 0 while True: # grab the frame from the threaded video stream, resize it to # have a maximum width of 400 pixels, and convert it to # grayscale frame_got, frame = cap.read() # Empty frame frame_empty = np.zeros(frame.shape) # frame = imutils.rotate(frame, 90) frame = imutils.resize(frame, width=400) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # detect faces in the grayscale frame rects = detector(gray, 0) # check to see if a face was detected, and if so, draw the total # number of faces on the frame if len(rects) > 0: text = "{} face(s) found".format(len(rects)) # cv2.putText(frame, text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0, 0, 255), 2) # Empty frame cv2.putText(frame_empty, text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # initialize the frame counters and the total number of blinks TOTAL = 0 COUNTER = 0 # loop over the face detections for (i, rect) in enumerate(rects): # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) # ******************************** # Blink detection # extract the left and right eye coordinates, then use the # coordinates to compute the eye aspect ratio for both eyes leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] leftEAR = eye_aspect_ratio(leftEye) rightEAR = eye_aspect_ratio(rightEye) # average the eye aspect ratio together for both eyes ear = (leftEAR + rightEAR) / 2.0 # compute the convex hull for the left and right eye, then # visualize each of the eyes leftEyeHull = cv2.convexHull(leftEye) rightEyeHull = cv2.convexHull(rightEye) # cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1) # cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1) # Frame empty cv2.drawContours(frame_empty, [leftEyeHull], -1, (0, 255, 0), 1) cv2.drawContours(frame_empty, [rightEyeHull], -1, (0, 255, 0), 1) # check to see if the eye aspect ratio is below the blink # threshold, and if so, increment the blink frame counter if ear < conf.EYE_AR_THRESH: COUNTER += 1 # otherwise, the eye aspect ratio is not below the blink # threshold else: # if the eyes were closed for a sufficient number of # then increment the total number of blinks if COUNTER >= conf.EYE_AR_CONSEC_FRAMES: TOTAL += 1 # reset the eye frame counter COUNTER = 0 # draw the total number of blinks on the frame along with # the computed eye aspect ratio for the frame # cv2.putText(frame, "Blinks: {}".format(TOTAL), (30, 60), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # cv2.putText(frame, "EAR: {:.2f}".format(ear), (30, 90), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # Frame empty cv2.putText(frame_empty, "Blinks: {}".format(TOTAL), (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) cv2.putText(frame_empty, "EAR: {:.2f}".format(ear), (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) # ******************************** # convert dlib's rectangle to a OpenCV-style bounding box # [i.e., (x, y, w, h)], then draw the face bounding box (x, y, w, h) = face_utils.rect_to_bb(rect) # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) # # show the face number # cv2.putText(frame, "Face #{}".format(i + 1), (30, 120), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # Frame empty cv2.rectangle(frame_empty, (x, y), (x + w, y + h), (0, 255, 0), 2) # show the face number cv2.putText(frame_empty, "Face #{}".format(i + 1), (30, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw them on the image for (x, y) in shape: # cv2.circle(frame, (x, y), 1, (0, 255, 255), -1) cv2.circle(frame_empty, (x, y), 1, (0, 255, 255), -1) # ********************************************************** if frame_got is False: break # If frame comes from webcam, flip it so it looks like a mirror. if conf.vedio_path == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (conf.CNN_INPUT_SIZE, conf.CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. # pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) pose_estimator.draw_axes(frame_empty, steady_pose[0], steady_pose[1]) print('steady pose vector: {}'.format(steady_pose[0], steady_pose[1])) else: # cv2.putText(frame, "Signal loss", (200, 200), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(frame_empty, "Signal loss", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # ****************************************************************** # We send this frame to GazeTracking to analyze it gaze.refresh(frame) frame = gaze.annotated_frame() text = "" if gaze.is_blinking(): text = "Blinking" elif gaze.is_right(): text = "Looking right" elif gaze.is_left(): text = "Looking left" elif gaze.is_center(): text = "Looking center" cv2.putText(frame_empty, text, (250, 250), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 2) left_pupil = gaze.pupil_left_coords() right_pupil = gaze.pupil_right_coords() cv2.putText(frame_empty, "Left pupil: " + str(left_pupil), (250, 280), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1) cv2.putText(frame_empty, "Right pupil: " + str(right_pupil), (250, 310), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1) # ******************************************************************** # show the frame # cv2.imshow("Frame", frame) cv2.imshow("Frame", frame_empty) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # do a bit of cleanup cv2.destroyAllWindows() cap.stop()
def process(input_key): if exists('pretrain/' + input_key): return # input_key = data['keys'] input_file = get_video_file(input_key) print('Input file', input_file) stream = cv2.VideoCapture(input_file) mark_detector = MarkDetector() pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] frame = None count = 0 frame_got = True target_folder = 'pretrain/{key}'.format(key=input_key) if not exists(target_folder): makedirs(target_folder) while frame_got: frame_got, frame = stream.read() if frame_got: # print('Frame', frame) facebox = mark_detector.extract_cnn_facebox(frame) # print('Facebox', facebox) if facebox: height, width = frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (input_size, input_size)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # print('Marks', marks) for mark in marks: cv2.circle(frame, tuple(mark), 3, (255, 0, 0)) # print('Marks Length', len(marks)) pose = pose_estimator.solve_pose_by_68_points(marks) # print('Pose', pose) pose = [pose[0].tolist(), pose[1].tolist()] with open( '{target_folder}/vecs.{count}.txt'.format( count=count, target_folder=target_folder), 'w') as f: f.write(json.dumps(pose)) with open( '{target_folder}/marks.{count}.txt'.format( count=count, target_folder=target_folder), 'w') as f: f.write(json.dumps(marks.tolist())) cv2.imwrite( '{target_folder}/image.{count}.png'.format( count=count, target_folder=target_folder), frame) count += 1
def main(): #bagreader = BagFileReader(args.video, 640,480,848,480,30,30) bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15) # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = bagreader.get_color_frame() sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB) height, width, _ = sample_frame.shape fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps, (width, height)) # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: t1 = time.time() # Read frame, crop it, flip it, suits your needs. frame = bagreader.get_color_frame() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frame is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. # if video_src == 0: # frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. faceboxes = box_queue.get() print(faceboxes) mess = "Not detect pose" if faceboxes is not None: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), thickness=2) # Show preview. cv2.imshow("Preview", frame) out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break out.release() # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): instructions = 0 print "--- Intention classification for communication between a human and a robot ---" if instructions == 1: print "First you will be required to present a facial expression before you will do a head movement." print "If done correctly these gestures will be detected by the robot and it will perform the desired task." raw_input("Press Enter to continue...") if instructions == 1: print "This is the module for facial expression recognition." print "This program can detect the emotions: Happy and Angry." print "The program will look for the expression for 3 seconds." raw_input("To proceed to Facial Expression Recognition press Enter...") predictExp = 0 # Load Facial Expression Recognition trained model print "- Loading FER model... -" #faceExpModel = tf.keras.models.load_model("/home/bjornar/ML_models/FER/Good models(80+)/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5") faceExpModel = tf.keras.models.load_model( "/home/project/Bjorn/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5" ) # Load Face Cascade for Face Detection print "- Loading Face Cascade for Face Detection... -" #cascPath = "/home/bjornar/MScDissertation/TrainingData/FaceDetection/haarcascade_frontalface_default.xml" cascPath = "/home/project/Bjorn/IntentionClassification-Repository/haarcascade_frontalface_default.xml" faceCascade = cv2.CascadeClassifier(cascPath) ## Initializing Head Movement variables # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = cv2.imread("sample_frame.png") # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] counter = 0 font = cv2.FONT_HERSHEY_SIMPLEX numXPoints = 0 numYPoints = 0 sumX = 0 sumY = 0 currentTime = 0 previousTime1 = 0 previousTime2 = 0 directionArray = [] moveSequence = [] moves = [] classifyMoves = 0 headPoseDirection = 'emtpy' if camera == 'kinect': ## Initialize Kinect camera print "Initializing camera..." try: from pylibfreenect2 import OpenGLPacketPipeline pipeline = OpenGLPacketPipeline() except: try: from pylibfreenect2 import OpenCLPacketPipeline pipeline = OpenCLPacketPipeline() except: from pylibfreenect2 import CpuPacketPipeline pipeline = CpuPacketPipeline() #print("Packet pipeline:", type(pipeline).__name__) # Create and set logger #logger = createConsoleLogger(LoggerLevel.Debug) setGlobalLogger() fn = Freenect2() num_devices = fn.enumerateDevices() if num_devices == 0: print("- No device connected! -") sys.exit(1) serial = fn.getDeviceSerialNumber(0) device = fn.openDevice(serial, pipeline=pipeline) listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir | FrameType.Depth) # Register listeners device.setColorFrameListener(listener) device.setIrAndDepthFrameListener(listener) device.start() # NOTE: must be called after device.start() registration = Registration(device.getIrCameraParams(), device.getColorCameraParams()) elif camera == 'webcam': #video_capture = cv2.VideoCapture(0) video_capture = cv2.VideoCapture(-1) elif camera == 'video': #video_capture = cv2.VideoCapture(0) video_capture = cv2.VideoCapture( "/home/project/Bjorn/SonyHandycamTest.mp4") ## Facial Expression Recognition variables FER_prediction = [] FERclass = '' FERstart = 0 classifyMoves = 0 ## Head movement variables predictHeadMov = 3 HeadMov = [] HMCclass = '' detectedFaces = [] mark = [] notDone = 0 progressStatus = [0, 0] while notDone in progressStatus: # This waits for each module to finsih if camera == 'kinect': frames = listener.waitForNewFrame() color = frames["color"] color = color.asarray() color = cv2.resize(color, (int(873), int(491))) color = color[0:480, 150:790] color = np.delete(color, np.s_[3::], 2) elif camera == 'webcam': # Capture frame-by-frame ret, color = video_capture.read() elif camera == 'video': # Capture frame-by-frame ret, color = video_capture.read() ## Detect facial expression predictExpNums = [1, 2] if predictExp == 0: while predictExp not in predictExpNums: predictExp = int( raw_input( "\nPress 1 to detect Facial Expression or press 2 to do Head Movement classification." )) if predictExp == 1: predictExp = 1 print "------ Facial Expression Recognition ------" elif predictExp == 2: predictHeadMov = 0 progressStatus[0] = 1 FERstart = time.time() elif predictExp == 1: currentTime = time.time() if currentTime - FERstart < 10: FER_prediction.append( facialExpressionRecogntion(color, faceExpModel, faceCascade)) else: predictExp = 2 FER_prediction = filter(None, FER_prediction) FERclass = mostCommon(FER_prediction) FERclass = FERclass[2:7] predictHeadMov = 0 if FERclass == '': print("Did not detect an expression, try again.") predictExp = 0 else: progressStatus[0] = 1 print "Detected expression: " + str(FERclass) print "Progress: FER DONE" # else: # #cv2.imwrite("sample_frame.png", color) # break ## Detect head movement class if predictHeadMov == 0: predictHeadMov = int( raw_input( "\nPress 1 to do Head Movement classification or 2 to skip." )) if predictHeadMov == 1: predictHeadMov = 1 print "------ Head Movement Classification ------" moveTime = int( raw_input( "How many seconds should I record your movement?")) #moveTime = 2 else: predictHeadMov = 2 timer = time.time() previousTime1 = timer previousTime2 = timer if predictHeadMov == 1: print color.shape color = color[0:480, 0:480] color = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY) cv2.imshow("", color) raw_input() print color.shape # Feed frame to image queue. img_queue.put(color) #pdb.set_trace() # Get face from box queue. facebox = box_queue.get() print color.shape if facebox is not None: # Detect landmarks from image of 128x128. face_img = color[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) #Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(color, headPoseDirection, (20, 70), font, 1, (0, 255, 0), 4) # # Get average position of nose noseMarksTemp = [] noseMarksTemp.append(marks[30][0]) noseMarksTemp.append(marks[30][1]) noseMarks[0] = noseMarksTemp for i in range(9, 0, -1): noseMarks[i] = noseMarks[i - 1] # Get the direction of head movement headPoseDirection = calculateDirection(noseMarks) directionArray.append(headPoseDirection) if classifyMoves == 0: classifyMoves = 1 timer = time.time() previousTime1 = timer previousTime2 = timer currentTime = time.time() if currentTime - previousTime1 > moveTime and classifyMoves == 1: print "------------------------------------------------" print "Elapsed timer 1: " + str(currentTime - previousTime1) # Get most common direction HMCclass = mostCommon(directionArray) classifyMoves = 2 directionArray = [] previousTime1 = currentTime progressStatus[1] = 1 print "Progress: HMC DONE" else: print "Did not detect a face" elif predictHeadMov == 2: progressStatus[1] = 1 print "Skipped Head Movement Estimation." break # if notDone in progressStatus and predictHeadMov == 2 and predictExp == 2: # print "You seem to have skipped one or more tasks." # inpt = '' # while inpt == '': # inpt = raw_input("To do FER press 1 and to do HMC press 2...") # if input == '1': # predictExp = 1 # elif input == '2': # predictHeadMov cv2.imshow("", color) if camera == 'kinect': listener.release(frames) key = cv2.waitKey(delay=1) if key == ord('q'): break if camera == 'kinect': listener.release(frames) device.stop() device.close() elif camera == 'webcam' or camera == 'video': video_capture.release() cv2.destroyAllWindows() # Clean up the multiprocessing process. box_process.terminate() box_process.join() print "---------------- RESULT ----------------" if FERclass != '': print "Detected facial expression: " + str(FERclass) else: print "Did not detect any expression." if HMCclass != '': print "Detected head movement: " + str(HMCclass) else: print "Did not detect a head movement." print "----------------------------------------" intentionClassification = [FERclass, HMCclass] return intentionClassification
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) right_corner = tuple([int(i) for i in marks[36]]) left_corner = tuple([int(i) for i in marks[45]]) # print(marks[36], marks[45]) cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2) pixel_distance = int( math.sqrt((right_corner[0] - left_corner[0])**2 + (right_corner[1] - left_corner[1])**2)) estimated_distance = (real_width * focal_length) / pixel_distance cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. print(steady_pose[1]) pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) angles = process_eyes(frame, marks) if bool(angles) is True: # print(angles) angles = cvt_to_radians(angles) rotated_vector = rotate_vector(steady_pose[0], angles['right'][0], angles['right'][1]) shifted_translation_vector = np.copy(steady_pose[1]) shifted_translation_vector[0] += 50 shifted_translation_vector[1] += 50 pose_estimator.draw_axes(frame, rotated_vector, shifted_translation_vector) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def headPoseEstimation(): print("HEAD POSE ESTIMATION...") """MAIN""" # Video source from webcam or video file. #video_src = 0 #video_src = 'EWSN.avi' #cam = cv2.VideoCapture(video_src) #_, sample_frame = cam.read() # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() print("1") # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() print("2") # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] print("3") noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] counter = 0 font = cv2.FONT_HERSHEY_SIMPLEX numXPoints = 0 numYPoints = 0 sumX = 0 sumY = 0 # start = time.time() # previousTime1 = start # previousTime2 = start currentTime = 0 previousTime1 = 0 previousTime2 = 0 directionArray = [] moveSequence = [] moves = [] classifyMoves = 0 headPoseDirection = 'emtpy' while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break #print("4") #print(frame.shape) #print("5") # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. #if video_src == 0: # print("6") # frame = cv2.flip(frame, 2) # cv2.imwrite("Preview.png", frame) # print("7") # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) #print("8") # Get face from box queue. facebox = box_queue.get() #print(type(facebox) #print(facebox #print("9") if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) print(face_img.shape) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # # Get average position of nose noseMarksTemp = [] noseMarksTemp.append(marks[30][0]) noseMarksTemp.append(marks[30][1]) noseMarks[0] = noseMarksTemp for i in range(9, 0, -1): noseMarks[i] = noseMarks[i - 1] # Get the direction of head movement headPoseDirection = calculateDirection(noseMarks) #if headPoseDirection != 'still': directionArray.append(headPoseDirection) #print(directionArray #print(len(directionArray) print("To capture a movement press 'a' and perform a movement.") #currentTime1 = time.time() if cv2.waitKey(5) == ord('a') and not classifyMoves: classifyMoves = 1 print("Start classifying movement...") timer = time.time() currentTime = timer previousTime1 = timer previousTime2 = timer if cv2.waitKey(5) == ord('b') and classifyMoves: classifyMoves = 0 print("Stopped classifying movement...") currentTime = time.time() if currentTime - previousTime1 > 2 and classifyMoves: print("------------------------------------------------") print("Elapsed timer 1: " + str(currentTime - previousTime1)) #print(len(directionArray) # Get most common direction moveClass = mostCommon(directionArray) #moveSequence.append(moveClass) print(moveClass) # Get a sequence of head movements # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0: # print("Elapsed timer 2: " + str(currentTime - previousTime2) # numMoves = len(moveSequence) # moves = moveSequence[(numMoves-5):(numMoves-1)] # print(moves # moveSequence = [] # previousTime2 = currentTime # classifyMoves = 0 classifyMoves = 0 directionArray = [] previousTime1 = currentTime print( "To continue type 'c' or to recapture a movement type 'a'." ) if cv2.waitKey(5) == ord('c'): break #print(previousTime # Uncomment following line to show raw marks. #mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. #pose_estimator.draw_annotation_box( # frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # if len(moves) > 1: # cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4) cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0), 4) # Show preview. #cv2.namedWindow("", cv2.WINDOW_NORMAL) cv2.imshow("Preview", frame) #cv2.resizeWindow("preview", 5000,5000) if cv2.waitKey(5) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join() return moveClass
def main(): """MAIN""" cv2.namedWindow("Test") # Create a named window cv2.moveWindow("Test", 900, 600) # Move it to (40,30) screenWidth, screenHeight = pyautogui.size() st = 'Last command' cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Setting up process for listening to audio commands voice_command_queue = Q() stt_process = Thread(target=get_voice_command, args=(voice_command_queue, )) stt_process.setDaemon(True) stt_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(255, 128, 128)) # Uncomment following line to draw head axes on frame. endpoints = pose_estimator.getEndPoints(frame, steady_pose[0], steady_pose[1]) deltax = endpoints[1][0] - endpoints[0][0] deltay = endpoints[1][1] - endpoints[0][1] xpos = math.floor((deltax + 44) * screenWidth / 88) ypos = math.floor((deltay + 14) * screenHeight / 58) # print(xpos, ypos) pyautogui.moveTo(xpos, ypos) if not voice_command_queue.empty(): command = voice_command_queue.get_nowait() if 'click' in command or 'select' in command: pyautogui.click() st = 'Click' elif 'double' in command or 'in' in command: pyautogui.doubleClick() st = 'Double Click' elif 'right' in command or 'menu' in command or 'light' in command: pyautogui.rightClick() st = 'Right Click' print(command) cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255) scale_percent = 30 # calculate the 50 percent of original dimensions width = int(frame.shape[1] * scale_percent / 100) height = int(frame.shape[0] * scale_percent / 100) # dsize dsize = (width, height) # resize image output = cv2.resize(frame, dsize) cv2.moveWindow("Test", screenWidth - width, screenHeight - height) # Show preview. cv2.imshow("Test", output) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print("Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) if args.out != None: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] tm = cv2.TickMeter() cnt = 0 input_path = args.input_path listdir = os.listdir(input_path) for v_name in listdir: v_path = os.path.join(input_path, v_name) cap = cv2.VideoCapture(v_path) while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box( frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) # Show preview. # cv2.imshow("Preview", frame) # if cv2.waitKey(10) == 27: # break if args.out != None: output_movie.write(frame) else: cv2.imshow("Preview", frame) cnt = cnt + 1 if cnt % 100 == 0: print(str(cnt), flush=True) # Clean up the multiprocessing process. box_process.terminate() box_process.join() cv2.destroyAllWindows()
def main(): """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. pose_estimator.draw_annotation_box( frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--draw-markers", action="store_true", default=False, help="") ap.add_argument("-c", "--draw-confidence", action="store_true", default=False, help="") ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9, help="") ap.add_argument("-p", "--draw-pose", action="store_false", default=True, help="") ap.add_argument("-u", "--draw-unstable", action="store_true", default=False, help="") ap.add_argument("-s", "--draw-segmented", action="store_true", default=False, help="") args = vars(ap.parse_args()) confidence_threshold = args["confidence_threshold"] """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) if isWindows(): thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) thread.daemon = True thread.start() else: box_process = Process(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. result = box_queue.get() if result is not None: if args["draw_confidence"]: mark_detector.face_detector.draw_result(frame, result) # unpack result facebox, confidence = result # fix facebox if needed if facebox[1] > facebox[3]: facebox[1] = 0 if facebox[0] > facebox[2]: facebox[0] = 0 # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # segment the image based on markers and facebox seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0]) if args["draw_segmented"]: mark_detector.draw_box(frame, seg.getSegmentBBs()) cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"]) if args["draw_markers"]: mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stable_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) if args["draw_unstable"]: pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) if args["draw_pose"]: pose_estimator.draw_annotation_box( frame, stable_pose[0], stable_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. if not isWindows(): box_process.terminate() box_process.join()
def cameraCap(self): self.uId = self.userId.get() if self.uId != '': if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Here We Start") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100) mark_detector = MarkDetector() name = self.uId directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok='True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok='True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return poses = ['frontal', 'right', 'left', 'up', 'down'] file = 0 ret, sample_frame = self.cap.read() i = 0 count = 0 if ret == False: return # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] images_saved_per_pose = 0 number_of_images = 0 shape_predictor = dlib.shape_predictor( "shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) while i < 5: saveit = False # Read frame, crop it, flip it, suits your needs. ret, frame = self.cap.read() if ret is False: break if count % 5 != 0: # skip 5 frames count += 1 continue if images_saved_per_pose == IMAGE_PER_POSE: i += 1 images_saved_per_pose = 0 # If frame comes from webcam, flip it so it looks like a mirror. if file == 0: frame = cv2.flip(frame, 2) original_frame = frame.copy() frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1 = max(facebox[0] - 0, 0) x2 = min(facebox[2] + 0, width) y1 = max(facebox[1] - 0, 0) y2 = min(facebox[3] + 0, height) face = frame[y1:y2, x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # print(steady_pose[0][0]) # if steady_pose[0][0]>0.1: # print('right') # else: # if steady_pose[0][0]<-0.1: # print('left') # if steady_pose[0][1]>0.1: # print('down') # else: # if steady_pose[0][1]<-0.1: # print('up') # print(steady_pose[0]) if i == 0: if abs(steady_pose[0][0]) < ANGLE_THRESHOLD and abs( steady_pose[0][1]) < ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 1: if steady_pose[0][0] > ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 2: if steady_pose[0][0] < -ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 3: if steady_pose[0][1] < -ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 4: if steady_pose[0][1] > ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True # Show preview. if i >= 5: print('Thank you') if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Thanks") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100) break frame = cv2.putText( frame, poses[i] + ' : ' + str(images_saved_per_pose) + '/' + str(IMAGE_PER_POSE), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 1, cv2.LINE_AA) frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2) frame = imutils.resize(frame, width=300, height=300) # OpenCV represents images in BGR order; however PIL # represents images in RGB order, so we need to swap # the channels, then convert to PIL and ImageTk format image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image = ImageTk.PhotoImage(image) # if the panel is not None, we need to initialize it if self.panel is None: self.panel = Label(self.framePic, image=image) self.panel.image = image self.panel.pack(side=LEFT, padx=0, pady=0) print("Done") # otherwise, simply update the panel else: self.panel.configure(image=image) self.panel.image = image if saveit: face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align( original_frame, frame_gray, face) cv2.imwrite( os.path.join( directory, str(name) + '_' + str(number_of_images) + '.jpg'), face_aligned) print(images_saved_per_pose) number_of_images += 1 self.cap.release() else: if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Please Enter a Valid Id") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100)