class PoseWrapper: @staticmethod def distance_kps(kp1, kp2): # kp1 and kp2: numpy array of shape (3,): [x,y,conf] x1, y1, c1 = kp1 x2, y2, c2 = kp2 if c1 > 0 and c2 > 0: return np.linalg.norm(kp1[:2] - kp2[:2]) else: return 0 def __init__(self, draw_render=False): self.draw_render = draw_render self.net = InferenceEnginePyTorch('human-pose-estimation-3d.pth', 'GPU') def eval(self, frame): self.frame = frame base_height = 256 scale = base_height / self.frame.shape[0] scaled_img = cv2.resize(self.frame, dsize=None, fx=scale, fy=scale) inference_result = self.net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, scale, 8, 1) if self.draw_render: draw_poses(self.frame, poses_2d) if poses_2d.shape[ 0] != 0: # When no person is detected, shape = (), else (nb_persons, 25, 3) self.body_kps = np.array([ np.array(poses_2d[pose_id][0:-1]).reshape((-1, 3)) for pose_id in range(len(poses_2d)) ]) # We sort persons by their an "estimation" of their size # size has little to do with the real size of a person, but is a arbitrary value, here, calculated as distance(Nose, Neck) + 0.33*distance(Neck,Midhip) sizes = np.array([ self.length(pairs_spine, person_idx=i, coefs=[1, 0.33]) for i in range(self.body_kps.shape[0]) ]) # Sort from biggest size to smallest order = np.argsort(-sizes) sizes = sizes[order] self.body_kps = self.body_kps[order] # Keep only the biggest person self.body_kps = self.body_kps[0] self.nb_persons = 1 else: self.nb_persons = 0 self.body_kps = [] return self.nb_persons, self.body_kps def get_body_kp(self, kp_name="Neck"): """ Return the coordinates of a keypoint named 'kp_name' of the person of index 'person_idx' (from 0), or None if keypoint not detected """ try: x, y, conf = self.body_kps[body_kp_name_to_id[kp_name]] except: print(f"get_body_kp: invalid kp_name '{kp_name}'") return None if conf > 0: return (int(x), int(y)) else: return None def length(self, pairs, person_idx=0, coefs=None): """ Calculate the mean of the length of the pairs in the list 'pairs' for the person of index 'person_idx' (from 0) If one (or both) of the 2 points of a pair is missing, the number of pairs used to calculate the average is decremented of 1 """ if coefs is None: coefs = [1] * len(pairs) person = self.body_kps[person_idx] l_cum = 0 n = 0 for i, pair in enumerate(pairs): l = self.distance_kps(person[body_kp_name_to_id[pair.p1]], person[body_kp_name_to_id[pair.p2]]) if l != 0: l_cum += l * coefs[i] n += 1 if n > 0: return l_cum / n else: return 0
def run_inference(args): from modules.inference_engine_pytorch import InferenceEnginePyTorch socket_server = SocketServer(args.port) joint_angle_calculator = JointAngleCalculator() stride = 8 model_path = os.path.join('models', 'human-pose-estimation-3d.pth') net = InferenceEnginePyTorch(model_path, "GPU") canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8) plotter = Plotter3d(canvas_3d.shape[:2]) canvas_3d_window_name = 'Canvas 3D' cv2.namedWindow(canvas_3d_window_name) cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback) file_path = None if file_path is None: file_path = os.path.join('data', 'extrinsics.json') with open(file_path, 'r') as f: extrinsics = json.load(f) R = np.array(extrinsics['R'], dtype=np.float32) t = np.array(extrinsics['t'], dtype=np.float32) frame_provider = ImageReader(args.images) is_video = False if args.video != '': frame_provider = VideoReader(args.video) is_video = True base_height = args.height_size fx = 1 # focal length delay = 1 esc_code = 27 p_code = 112 space_code = 32 mean_time = 0 for frame in frame_provider: current_time = cv2.getTickCount() if frame is None: break input_scale = base_height / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # better to pad, but cut out for demo if fx < 0: # Focal length is unknown fx = np.float32(0.8 * frame.shape[1]) inference_result = net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video) edges = [] if len(poses_3d): poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2)) plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) draw_poses(frame, poses_2d) current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency() if mean_time == 0: mean_time = current_time else: mean_time = mean_time * 0.95 + current_time * 0.05 cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10), (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255)) cv2.imshow('ICV 3D Human Pose Estimation', frame) key = cv2.waitKey(delay) if key == esc_code: break if key == p_code: if delay == 1: delay = 0 else: delay = 1 if delay == 0 or not is_video: # allow to rotate 3D canvas while on pause key = 0 while (key != p_code and key != esc_code and key != space_code): plotter.plot(canvas_3d, poses_3d, edges) cv2.imshow(canvas_3d_window_name, canvas_3d) key = cv2.waitKey(33) if key == esc_code: break else: delay = 1 joint_angles = joint_angle_calculator.calculate_angles(poses_3d) if joint_angles: socket_server.send_data(joint_angles)
def pose3d(): stride = 8 #f.write("check8"); net = InferenceEnginePyTorch( PyPATH + '\model\human-pose-estimation-3d.pth', 'GPU') with open(PyPATH + '\parameters\extrinsics.json', 'r') as f: extrinsics = json.load(f) R = np.array(extrinsics['R'], dtype=np.float32) t = np.array(extrinsics['t'], dtype=np.float32) cap = cv2.VideoCapture(1) if not (cap.isOpened): print("Webcam not recognized") base_height = 256 while True: ret, frame = cap.read() if (ret == False): continue #time.sleep(1) #print(ret) input_scale = base_height / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) #scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # better to pad, but cut out for demo fx = np.float32(0.8 * frame.shape[1]) inference_result = net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx) if len(poses_2d): poses_3d_copy = poses_2d.copy() x = poses_3d_copy[:, 0::3] y = poses_3d_copy[:, 1::3] #0 - GRUD #1 - nose #2 - nothing #3 - levoe plecho #4 - levi lokot #5 - levi zapastie #6 - levoe taz #7 - levi koleno #8 - levi stopa #9 - pravoe plecho #10 - pravoe lokot #11 - pravoe zapastie #12 - pravoe taz #13 - pravoe koleno #14 - pravoe stopa frame = cv2.circle(frame, (x[0][7], y[0][7]), 10, (255, 0, 0)) print("x") print(x[0][7]) print("y") print(y[0][7]) #frame #size = len(x[0]) #i = 0 #for a in x[0]: #frame = cv2.circle(frame, (x[0][i], y[0][i]), 10,(255, 0, 0)) #i=i+1 cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #print(poses_3d_copy) #print("x") #print(x) #print("y") #print(y) if len(poses_3d): poses_3d_copy = poses_3d.copy() #poses_3d_1 = poses_3d #poses_3d_2 = poses_3d x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4]
class InferCtrl: net = None extrinsics = None def __init__(self, model, height=256, device='CPU', openvino=False, tensorrt=False, extrinsics_path="./data/extrinsics.json", fx=1, canvas_shape=(720, 1280, 3)) -> None: if openvino: from modules.inference_engine_openvino import InferenceEngineOpenVINO self.net = InferenceEngineOpenVINO(model, device) else: from modules.inference_engine_pytorch import InferenceEnginePyTorch self.net = InferenceEnginePyTorch(model, device, tensorrt) try: with open(extrinsics_path, 'r') as f: self.extrinsics = json.load(f) except Exception: with open("./data/extrinsics.json", 'r') as f: self.extrinsics = json.load(f) traceback.print_exc() self.base_height = height self.fx = fx self.canvas_3d = np.zeros(canvas_shape, dtype=np.uint8) self.plotter = Plotter3d(self.canvas_3d.shape[:2]) # print("[INFO] plotter shape {}".format(self.plotter.shape)) print("[INFO] canvas shape {}".format(self.canvas_3d.shape)) def process_frame(self, frame, inference_result, merged=False): poses_3d = inference_result.get("pose_3d", {}).get("value", []) poses_2d = inference_result.get("pose_2d", {}).get("value", []) edges = inference_result.get("edges", {}).get("value", []) self.plotter.plot(self.canvas_3d, poses_3d, edges) draw_poses(frame, poses_2d) if merged: frame_side = np.copy(self.canvas_3d) new_w = min(frame.shape[1], frame_side.shape[1]) rel_h_f = int(new_w * frame.shape[0] * 1.0 / frame.shape[1]) rel_h_s = int(new_w * frame_side.shape[0] * 1.0 / frame_side.shape[1]) frame = cv2.resize(frame, (new_w, rel_h_f)) frame_side = cv2.resize(frame_side, (new_w, rel_h_s)) return np.hstack([frame, frame_side]) return frame, np.copy(self.canvas_3d) def infer(self, frame, is_video=True, fx=None): stride = 8 output = {} if fx is None: fx = self.fx output["focal_length"] = { "value": fx, "comment": "default value used because none was supplied" } R = np.array(self.extrinsics['R'], dtype=np.float32) t = np.array(self.extrinsics['t'], dtype=np.float32) input_scale = self.base_height / frame.shape[0] scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale) scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # scaled_img = pad_resize_image(scaled_img, (scaled_img.shape[0], (scaled_img.shape[1] + stride)//stride, scaled_img.shape[2])) output["input_size"] = { "value": scaled_img.shape, "comment": "network inpute size" } if fx < 0: # Focal length is unknown fx = np.float32(0.8 * frame.shape[1]) output["focal_length"] = { "value": fx, "comment": "Focal length is unknown, 0.8 * frame width used" } # the inference inference_result = self.net.infer(scaled_img) poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video) edges = [] if len(poses_3d): poses_3d = rotate_poses(poses_3d, R, t) poses_3d_copy = poses_3d.copy() x = poses_3d_copy[:, 0::4] y = poses_3d_copy[:, 1::4] z = poses_3d_copy[:, 2::4] poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape( (-1, 1, 1))).reshape((-1, 2)) output["pose_3d"] = { "value": poses_3d, "comment": "re-oriented 3D poses" } output["pose_2d"] = {"value": poses_2d, "comment": "2D poses"} output["edges"] = {"value": edges, "comment": "2D poses"} return output