def get_all_poses(self, image, mark_detector=None, face_detector=None, draw=False, draw_marks=False): """ Get all the poses in an image """ CNN_INPUT_SIZE = 128 print("Initializing detectors") # Introduce mark_detector to detect landmarks. if mark_detector is None: from .mark_detector import MarkDetector mark_detector = MarkDetector() if face_detector is None: from .mark_detector import FaceDetector face_detector = FaceDetector() confidences, faceboxes = face_detector.get_faceboxes( image, threshold=0.2) poses = [] print("{} FACEBOXES".format(len(faceboxes))) for facebox in faceboxes: if min(facebox) < 0: continue # Detect landmarks from image of 128x128. face_img = image[facebox[1]: facebox[3], facebox[0]: facebox[2]] if not face_img.shape[0] or not face_img.shape[1]: continue face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] if draw_marks: mark_detector.draw_marks(image, marks, color=(0, 0, 255)) detected_face = image[facebox[1]: facebox[3], facebox[0]: facebox[2]] # crop detected face detected_face = cv2.cvtColor( detected_face, cv2.COLOR_BGR2GRAY) # transform to gray scale detected_face = cv2.resize( detected_face, (48, 48)) # resize to 48x48 # Try pose estimation with 68 points. pose = self.solve_pose_by_68_points(marks) #pose = pose_estimator.solve_pose(marks) poses.append((pose[0].copy(), pose[1].copy())) if draw: self.draw_annotation_box( image, pose[0], pose[1], color=(255, 128, 128)) return poses
def webcam_main(): print("Camera sensor warming up...") vs = cv2.VideoCapture(0) time.sleep(2.0) mark_detector = MarkDetector() # loop over the frames from the video stream while True: _, frame = vs.read() start = cv2.getTickCount() frame = imutils.resize(frame, width=750, height=750) frame = cv2.flip(frame, 1) faceboxes = mark_detector.extract_cnn_facebox(frame) if faceboxes is not None: for facebox in faceboxes: # Detect landmarks from image of 64X64 with grayscale. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1) land_start_time = time.time() marks = mark_detector.detect_marks_keras(face_img0) # marks *= 255 marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Draw Predicted Landmarks mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2) fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency() cv2.putText(frame, '%.1ffps'%(1/fps_time) , (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0)) # show the frame cv2.imshow("Frame", frame) # writer.write(frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # do a bit of cleanup cv2.destroyAllWindows() vs.stop()
def main(): # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--draw-markers", action="store_true", default=False, help="") ap.add_argument("-c", "--draw-confidence", action="store_true", default=False, help="") ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9, help="") ap.add_argument("-p", "--draw-pose", action="store_false", default=True, help="") ap.add_argument("-u", "--draw-unstable", action="store_true", default=False, help="") ap.add_argument("-s", "--draw-segmented", action="store_true", default=False, help="") args = vars(ap.parse_args()) confidence_threshold = args["confidence_threshold"] """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) if isWindows(): thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) thread.daemon = True thread.start() else: box_process = Process(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. result = box_queue.get() if result is not None: if args["draw_confidence"]: mark_detector.face_detector.draw_result(frame, result) # unpack result facebox, confidence = result # fix facebox if needed if facebox[1] > facebox[3]: facebox[1] = 0 if facebox[0] > facebox[2]: facebox[0] = 0 # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # segment the image based on markers and facebox seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0]) if args["draw_segmented"]: mark_detector.draw_box(frame, seg.getSegmentBBs()) cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"]) if args["draw_markers"]: mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stable_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) if args["draw_unstable"]: pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) if args["draw_pose"]: pose_estimator.draw_annotation_box( frame, stable_pose[0], stable_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. if not isWindows(): box_process.terminate() box_process.join()
def webcam_main(): print("Camera sensor warming up...") cv2.namedWindow('face landmarks', cv2.WINDOW_NORMAL) frame = cv2.imread(IMAGE_PATH) time.sleep(2.0) mark_detector = MarkDetector(current_model, CNN_INPUT_SIZE) if current_model.split(".")[-1] == "pb": run_model = 0 elif current_model.split(".")[-1] == "hdf5" or current_model.split( ".")[-1] == "h5": run_model = 1 else: print("input model format error !") return faceboxes = mark_detector.extract_cnn_facebox(frame) print(faceboxes) if faceboxes is not None: facebox = faceboxes[0] # Detect landmarks from image of 64X64 with grayscale. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1) if run_model == 1: marks = mark_detector.detect_marks_keras(face_img0) else: marks = mark_detector.detect_marks_tensor(face_img0, 'input_2:0', 'output/BiasAdd:0') # marks *= 255 marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Draw Predicted Landmarks mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2) # loop over the frames from the video stream while True: start = cv2.getTickCount() # frame = cv2.resize(frame, (750,750)) # frame = cv2.flip(frame, 1) # fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency() # cv2.putText(frame, '%.1ffps'%(1/fps_time), (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0)) # show the frame cv2.imshow("face landmarks", frame) # writer.write(frame) key = cv2.waitKey(1) # if the `q` key was pressed, break from the loop if key == ord("q") or key == 0xFF: break # do a bit of cleanup cv2.destroyAllWindows()
def single_main(): """MAIN""" cam = cv2.VideoCapture(VIDEO_PATH) _, sample_frame = cam.read() # Load Landmark detector mark_detector = MarkDetector() # Setup process and queues for multiprocessing img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() while True: frame_got, frame = cam.read() if frame_got is False: break img_queue.put(frame) start = cv2.getTickCount() # Get face from box queue. faceboxes = box_queue.get() if faceboxes is not None: for facebox in faceboxes: # if facebox is not None: # Detect landmarks from image of 64x64. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1) marks = mark_detector.detect_marks_keras( face_img0) # landmark predictor marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Draw Predicted Landmarks mark_detector.draw_marks(frame, marks, color=(255, 255, 255)) fps_time = (cv2.getTickCount() - start) / cv2.getTickFrequency() cv2.putText(frame, '%.1ffps' % (1 / fps_time), (frame.shape[1] - 65, 15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (127, 127, 127)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == ord('q'): break # Clean up the multiprocessing process. box_process.terminate() box_process.join() # out.release() cam.release() cv2.destroyAllWindows()
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) right_corner = tuple([int(i) for i in marks[36]]) left_corner = tuple([int(i) for i in marks[45]]) # print(marks[36], marks[45]) cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2) pixel_distance = int( math.sqrt((right_corner[0] - left_corner[0])**2 + (right_corner[1] - left_corner[1])**2)) estimated_distance = (real_width * focal_length) / pixel_distance cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. print(steady_pose[1]) pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) angles = process_eyes(frame, marks) if bool(angles) is True: # print(angles) angles = cvt_to_radians(angles) rotated_vector = rotate_vector(steady_pose[0], angles['right'][0], angles['right'][1]) shifted_translation_vector = np.copy(steady_pose[1]) shifted_translation_vector[0] += 50 shifted_translation_vector[1] += 50 pose_estimator.draw_axes(frame, rotated_vector, shifted_translation_vector) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) #cam.set(3, 640) #cam.set(4, 480) sF = 1.05 _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) img_queue.put(sample_frame) thread = threading.Thread(target=get_face, args=(mark_detector, img_queue, box_queue)) thread.daemon = True thread.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points.f pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. pose_estimator.draw_annotation_box(facebox, frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) ret, img = cap.read() # gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) # blur = cv2.GaussianBlur(gray,(5,5),0) # thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2) x = width / 2 + 275 y = height / 2 + 275 text_color = (255, 0, 0) cv2.imshow("input", cv2.resize(frame, (800, 600))) # Show preview. if cv2.waitKey(10) == 27: break
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \ queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans, parts=[4, 7]): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None image_h, image_w, _ = img.shape if parts is None or len(parts) == 0: return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) else: for human in humans: for part in parts: if part in human.body_parts.keys(): body_part = human.body_parts[part] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) img = cv2.circle(img, coord, 2, (0, 255, 0)) return img def color_callback( self, data ): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480, 640, 3)) #print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def get_top_vertice_human(self, human, img_w, img_h): sorted_id = sorted(human["pose"].body_parts.keys()) body_part = human["pose"].body_parts[sorted_id[0]] return (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) def is_gazing(self, face_direction, human, img): def get_angle_direct_with_peakhand(face_direction, peak): d0, d1 = face_direction peak_direction = (peak[0] - d0[0], peak[1] - d0[1]) face_direct = (d1[0] - d0[0], d1[1] - d0[1]) d_peak_direction = np.sqrt(peak_direction[0]**2 + peak_direction[1]**2) d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2) return (peak_direction[0]*face_direct[0] + \ peak_direction[1]*face_direct[1])/(d_peak_direction*\ d_face_direct) thresh = 0.1 #0.5 #cos(PI/3) image_h, image_w, _ = img.shape if 4 in human.body_parts.keys(): body_part = human.body_parts[4] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle left = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh if 7 in human.body_parts.keys(): body_part = human.body_parts[7] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle right = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh return False def get_gazing_status(self, face_directions, face_coords, humans, img): def get_dist_to_humans(human_coords, coord): result = [] for _coord in human_coords: result.append(np.sqrt((coord[0]-_coord[0])**2) + \ (coord[1]-_coord[1])**2) return result image_h, image_w, _ = img.shape gazing_status = [] human_coords = [] for human in humans: sorted_id = sorted(human.body_parts.keys()) body_part = human.body_parts[sorted_id[0]] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) human_coords.append(coord) for fdirection, fcoord in zip(face_directions, face_coords): dists = get_dist_to_humans(human_coords, fcoord) idx = np.argmin(dists) gazing_status.append(self.is_gazing(fdirection, humans[idx], img)) del human_coords[idx] return gazing_status def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None or self.humans is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480 * 2, 640 * 2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes = self.mark_detector.extract_cnn_facebox(frame) mess = "Not detect pose" face_directions = [] face_coords = [] if faceboxes is not None and len(faceboxes) > 0: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: #facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_coords.append((int((facebox[0]+facebox[2])/2), \ int((facebox[1]+facebox[3])/2))) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. face_direction = self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) face_directions.append(face_direction) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) gazing_status = self.get_gazing_status(face_directions, face_coords, \ copy.deepcopy(self.humans), self.color_img) msg = GazingInfo() msg.is_gazing = [] msg.coords = [] for coord, gazestatus in zip(face_coords, gazing_status): _coord = Coord2D() _coord.x = coord[0] _coord.y = coord[1] msg.coords.append(_coord) msg.is_gazing.append(gazestatus) self.pub_headpose_info.publish(msg) print(gazing_status) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \ # queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480,640, 3)) print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480*2, 640*2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes, landmark = self.detector.detect(frame, \ thresh, scales=scales, do_flip=False) mess = "Not detect pose" if faceboxes is not None and len(faceboxes) > 0: #if isinstance(faceboxes[1], int): # faceboxes = [faceboxes] for facebox in faceboxes: facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess=round(1/(t2-t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)