def main(): model = ModelPipeline() webcam = WebcamCapture() base_fps = webcam.get(cv2.CAP_PROP_FPS) print('Base FPS:', base_fps) info_frame = crop(webcam.read()) mp.Process(target=pull, args=(info_frame, base_fps)).start() context = zmq.Context() socket = context.socket(zmq.PUSH) socket.bind('tcp://*:5555') height, width, _ = info_frame.shape hand_data_keys = ['origin', 'joints', 'distX', 'distY', 'vert'] fps_send = FPS('Send:') while True: frame_large = webcam.read_rgb() frame_large = crop(frame_large) frame_large_l = frame_large[:, :width // 2] frame_large_r = frame_large[:, width // 2:] frame_l = imresize(frame_large_l, (128, 128)) frame_r = imresize(frame_large_r, (128, 128)) # iv - intermediate values ivl, _ = model.process(np.flip(frame_l, axis=1)) ivr, _ = model.process(frame_r) hand_data_l = calc_hand_data(ivl) hand_data_r = calc_hand_data(ivr) if hand_data_l is not None and hand_data_r is not None: socket.send_json( { 'dataL': dict(zip(hand_data_keys, hand_data_l)), 'dataR': dict(zip(hand_data_keys, hand_data_r)), 'frameWidth': frame_large.shape[1], 'frameHeight': frame_large.shape[0], }, zmq.SNDMORE) socket.send(np.flip(frame_large, axis=0).tobytes()) fps_send()
def live_application(capture, output_dirpath): model = ModelPipeline() frame_index = 0 mano_params = [] measure_time = True while True: frame_large = capture.read() if frame_large is None: print(f'none frame {frame_index}') # if frame_index == 0: # continue break # if frame_large.shape[0] > frame_large.shape[1]: # margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) # frame_large = frame_large[margin:-margin] # else: # margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) # frame_large = frame_large[:, margin:-margin] frame = imresize(frame_large, (128, 128)) if measure_time: ends1 = [] ends2 = [] for i in range(1000): start = time.time() _, theta_mpii = model.process(frame) end1 = time.time() theta_mano = mpii_to_mano(theta_mpii) end2 = time.time() ends1.append(end1 - start) ends2.append(end2 - start) t1 = np.mean(ends1[10:]) t2 = np.mean(ends2[10:]) print(f't1: {t1 * 1000:.2f}ms, {1 / t1:.2f}hz') print(f't2: {t2 * 1000:.2f}ms, {1 / t2:.2f}hz') return else: _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) mano_params.append(deepcopy(theta_mano.tolist())) osp.join(output_dirpath, "%06d.jpg" % frame_index) frame_index += 1 with open(osp.join(output_dirpath, f'{capture.side}.pickle'), 'w') as f: json.dump(mano_params, f)
class Processor: def __init__(self): self.model = ModelPipeline() self.flip_side = 'right' def process(self, img, side): frame = imresize(img, (128, 128)) if side == self.flip_side: frame = np.flip(frame, axis=1) _, theta_mpii = self.model.process(frame) theta_mano = mpii_to_mano(theta_mpii) return theta_mano, frame
def live_application(capture, output_dirpath): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ ############ output visualization ############ view_mat = axangle2mat([1, 0, 0], np.pi) # align different coordinate systems window_size = 1080 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window(width=window_size + 1, height=window_size + 1, window_name='Minimal Hand - output') viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1, config.CAM_FX, config.CAM_FY, window_size // 2, window_size // 2) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() ############ input visualization ############ pygame.init() display = pygame.display.set_mode((window_size, window_size)) pygame.display.set_caption('Minimal Hand - input') ############ misc ############ mesh_smoother = OneEuroFilter(4.0, 0.0) clock = pygame.time.Clock() model = ModelPipeline() frame_index = 0 mano_params = [] while True: frame_large = capture.read() if frame_large is None: print(f'none frame {frame_index}') # if frame_index == 0: # continue break # if frame_large.shape[0] > frame_large.shape[1]: # margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) # frame_large = frame_large[margin:-margin] # else: # margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) # frame_large = frame_large[:, margin:-margin] frame = imresize(frame_large, (128, 128)) _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) mano_params.append(deepcopy(theta_mano.tolist())) osp.join(output_dirpath, "%06d.jpg" % frame_index) v = hand_mesh.set_abs_quat(theta_mano) v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() viewer.update_geometry() viewer.poll_events() viewer.capture_screen_image( osp.join(output_dirpath, "%06d.jpg" % frame_index)) display.blit( pygame.surfarray.make_surface( np.transpose(imresize(frame_large, (window_size, window_size)), (1, 0, 2))), (0, 0)) pygame.display.update() # if keyboard.is_pressed("esc"): # break clock.tick(30) frame_index += 1 with open(osp.join(output_dirpath, f'{capture.side}.pickle'), 'w') as f: json.dump(mano_params, f)
def __init__(self): self.model = ModelPipeline() self.flip_side = 'right'
# ) # view_control.convert_from_pinhole_camera_parameters(cam_params) # view_control.set_constant_z_far(1000) # render_option = viewer1.get_render_option() # render_option.load_from_json('./render_option.json') # viewer1.update_renderer() # render_option = viewer2.get_render_option() # render_option.load_from_json('./render_option.json') # viewer2.update_renderer() mesh_smoother1 = OneEuroFilter(2.0, 0.0) mesh_smoother2 = OneEuroFilter(2.0, 0.0) cords_smoother1 = OneEuroFilter(2.0, 0.0) cords_smoother2 = OneEuroFilter(2.0, 0.0) model = ModelPipeline() que = Queue() quet1 = Queue() quet2 = Queue() t11 = threading.Thread(target=Loop1, args=( que, quet1, quet2, )) t12 = threading.Thread(target=Loop1, args=( que, quet1, quet2, ))
def live_application(capture): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ ############ output visualization ############ view_mat = axangle2mat([1, 0, 0], np.pi) # align different coordinate systems window_size = 1080 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window(width=window_size + 1, height=window_size + 1, window_name='Minimal Hand - output') viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1, config.CAM_FX, config.CAM_FY, window_size // 2, window_size // 2) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() ############ input visualization ############ pygame.init() display = pygame.display.set_mode((window_size, window_size)) pygame.display.set_caption('Minimal Hand - input') ############ misc ############ mesh_smoother = OneEuroFilter(4.0, 0.0) clock = pygame.time.Clock() model = ModelPipeline() # image_paths = glob.glob("samples_benet/casa/*") input_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_video/CVmyQR31Dr4_5-3-rgb_front.mp4" output_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_video/CVmyQR31Dr4_5-3-rgb_front.mp4" output_frames_folder = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_frames/CVmyQR31Dr4_5-3-rgb_front" output_pose_file = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose/CVmyQR31Dr4_5-3-rgb_front.pickle" os.system("mkdir " + output_frames_folder) cap = cv2.VideoCapture(input_video_path) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) # for inputpath in image_paths: # for i in range(length): writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'PIM1'), fps, (1081, 731)) pose_data = [] for i in range(length): # frame_large = capture.read() #inputpath = "samples_benet/hand_far.jpg" # frame_large = cv2.imread(inputpath) ret, frame_large = cap.read() frame_large = cv2.cvtColor(frame_large, cv2.COLOR_BGR2RGB) if frame_large is None: continue if frame_large.shape[0] > frame_large.shape[1]: margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) frame_large = frame_large[margin:-margin] elif frame_large.shape[0] < frame_large.shape[1]: print() margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) frame_large = frame_large[:, margin:-margin] frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) # plt.imshow(frame) # plt.show() _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) pose_data.append(theta_mano) v = hand_mesh.set_abs_quat(theta_mano) v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() # for some version of open3d you may need `viewer.update_geometry(mesh)` viewer.update_geometry() viewer.poll_events() write_rendered_frame(writer, viewer, output_frames_folder, i) display.blit( pygame.surfarray.make_surface( np.transpose(imresize(frame_large, (window_size, window_size)), (1, 0, 2))), (0, 0)) pygame.display.update() # pygame.image.save(display, "kk.jpeg") # if keyboard.is_pressed("esc"): # break # clock.tick(30) with open(output_pose_file, "wb") as output: pickle.dump(pose_data, output)
def live_application(capture): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ view_mat = axangle2mat([1, 0, 0], np.pi) window_size_w = 640 window_size_h = 480 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window( width=window_size_w + 1, height=window_size_h + 1, window_name='Minimal Hand - output' ) viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics( window_size_w + 1, window_size_h + 1, config.CAM_FX, config.CAM_FY, window_size_w // 2, window_size_h // 2 ) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() mesh_smoother = OneEuroFilter(4.0, 0.0) cords_smoother = OneEuroFilter(4.0, 0.0) model = ModelPipeline() while True: frame_large = capture.read() if frame_large is None: continue if frame_large.shape[0] > frame_large.shape[1]: margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) frame_large = frame_large[margin:-margin] else: margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) frame_large = frame_large[:, margin:-margin] if config.left is False: frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) cords, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) v = hand_mesh.set_abs_quat(theta_mano) # v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) # v = np.array([x+cords[0] for x in v]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() # for some version of open3d you may need `viewer.update_geometry(mesh)` viewer.update_geometry() viewer.poll_events() cords = cords_smoother.process(cords) cords = cords * 150 + 200 cords = np.delete(cords, 2, 1) # v = v * 1.5 + 200 # v = np.delete(v, 2, 1) frame_large = cv2.cvtColor(frame_large, cv2.COLOR_RGB2BGR) # cv2.polylines(frame_large, v, False, (0, 0, 0)) for x in cords: cv2.drawMarker(frame_large, (int(x[0]), int(x[1])), (0, 0, 0)) # cv2.line(frame_large, (int(v[x][0]), int(v[x][1])), (int(v[x+1][0]), int(v[x+1][1])), (0, 0, 0)) # meshindices = np.array(mesh.triangles) # meshvertices = np.array(mesh.vertices) - 80 # pts2d = cv2.projectPoints(meshvertices, (0, 0, 0), (0, 0, 0), np.array([[620.744, 0., 0.], [0., 621.151, 0.], [0., 0., 1.]]), None)[0].astype(int) # for face in meshindices: # cv2.fillConvexPoly(frame_large, pts2d[face], (64, 64, 192)) # cv2.polylines(frame_large, pts2d[meshindices], True, (255, 255, 255)) # cv2.polylines(frame_large, v, False, (0, 0, 0)) cv2.imshow("Hand AI", frame_large) if keyboard.is_pressed("q"): cv2.destroyAllWindows() break cv2.waitKey(1) time.sleep(0.0015)
def __init__(): global detection_graph, sess, fingerDetector detection_graph, sess = detector_utils.load_inference_graph() fingerDetector = ModelPipeline()