def __init__(self, cfg, weight, meta, W, H, camera_offset, tcp): self.net = darknet.load_net(cfg, weight, 0) self.meta = darknet.load_meta(meta) self.bridge = CvBridge() # receiving image's resolution self.W = W self.H = H # camera's position referencing base_link self.CAMERA_OFFSET = camera_offset # tcp self.TCP = tcp # Openpose Pose Detector self.pose_detector = PoseDetector(thresh=0.5, visualize=True) print('Detector loaded')
def convertData(gesture): parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) dataset = buildGestureDict("dataset/") gesturedf = pd.read_csv("sample.csv") for video in dataset[gesture]["videos"]: print("Currently processing the video for " + video["filename"]) startvideo = time.time() cap = cv2.VideoCapture(video["filepath"]) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) print("Amount of Frames:", amount_of_frames) cap.set(cv2.CAP_PROP_FPS, 5) ret, img = cap.read() counter = 1 df = pd.DataFrame(columns=["Head", "Left", "Right"]) frame_tracker = int(amount_of_frames / 12) framecounter = 0 #print(frame_tracker) left = 0 right = 0 while ret: ret, img = cap.read() # get video frame if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) if (counter % frame_tracker == 0): for person_pose in person_pose_array: firstPerson = True if not firstPerson: continue unit_length = pose_detector.get_unit_length(person_pose) # hands estimation # print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) left = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: left = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) right = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: right = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] print("Body Pose") person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = person_pose.tolist() for z in range(len(person_pose)): if (person_pose[z] != None): person_pose[z] = list(np.delete(person_pose[z], 2)) person_pose[z] = [int(a) for a in person_pose[z]] print(person_pose) print("Left") print(left) print("Right") print(right) cv2.imshow("result", res_img) head = person_pose for x in range(len(head)): if (head[x] == None): head[x] = [1000, 1000] pca = sklearnPCA(n_components=1) head = pca.fit_transform(head) dfhead = pd.DataFrame(data=head) dfhead = dfhead.T dfhead = dfhead.rename( columns={ 0: "head_1", 1: "head_2", 2: "head_3", 3: "head_4", 4: "head_5", 5: "head_6", 6: "head_7", 7: "head_8", 8: "head_9", 9: "head_10", 10: "head_11", 11: "head_12", 12: "head_13", 13: "head_14" }) for x in range(len(left)): if (left[x] == None): left[x] = [1000, 1000] pca = sklearnPCA(n_components=1) left = pca.fit_transform(left) dfleft = pd.DataFrame(data=left) dfleft = dfleft.T dfleft = dfleft.rename( columns={ 0: "left_1", 1: "left_2", 2: "left_3", 3: "left_4", 4: "left_5", 5: "left_6", 6: "left_7", 7: "left_8", 8: "left_9", 9: "left_10", 10: "left_11", 11: "left_12", 12: "left_13", 13: "left_14", 14: "left_15", 15: "left_16", 16: "left_17", 17: "left_18", 18: "left_19", 19: "left_20", 20: "left_21" }) for x in range(len(right)): if (right[x] == None): right[x] = [1000, 1000] pca = sklearnPCA(n_components=1) right = pca.fit_transform(right) dfright = pd.DataFrame(data=right) dfright = dfright.T dfright = dfright.rename( columns={ 0: "right_1", 1: "right_2", 2: "right_3", 3: "right_4", 4: "right_5", 5: "right_6", 6: "right_7", 7: "right_8", 8: "right_9", 9: "right_10", 10: "right_11", 11: "right_12", 12: "right_13", 13: "right_14", 14: "right_15", 15: "right_16", 16: "right_17", 17: "right_18", 18: "right_19", 19: "right_20", 20: "right_21" }) df2 = pd.concat([dfhead, dfleft, dfright], axis=1) df2["frame"] = framecounter df2["gesture"] = video["gesture"] df2["speaker"] = video["actor"] framecounter = framecounter + 1 df2["frame"] = df2["frame"].astype(int) newdf = newdf.append(df2, sort=False) gesturedf = gesturedf.append(df2, sort=False) firstPerson = False else: cv2.imshow("result", img) counter = counter + 1 #print("Frame",counter) if cv2.waitKey(1) & 0xFF == ord('q'): break #print(df) cap.release() cv2.destroyAllWindows() gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False) print("Done Recording for: " + gesture) print("Took " + str(time.time() - startvideo) + "seconds")
# モジュール検索パスの設定 REPO_ROOT = '' # PoseDetectorクラスのインポート from pose_detector import PoseDetector armave = {'Rzenwan': 63, 'Lzenwan': 64, 'Ljouwan': 67, 'Rjouwan': 66} legave = {'Ldaitai': 96, 'Lkatai': 86, 'Rdaitai': 95, 'Rkatai': 90} # モデルのロード arch_name = 'posenet' image_path = os.path.join(REPO_ROOT, 'data', 'person.png') weight_path = os.path.join(REPO_ROOT, 'models', 'coco_posenet.npz') model = PoseDetector(arch_name, weight_path) #1はUSBカメラ、0は内蔵カメラ cap = cv2.VideoCapture(1) # cap = cv2.VideoCapture(1) q = 0 timer = 0 w = 0 code = "" isBasis = True zdeg = {} counting = 1 basisT = False while (True): # Capture frame-by-frame ret, frame = cap.read()
chainer.using_config('enable_backprop', False) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--img', help='image file path') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(args.img) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6,
from pose_detector import PoseDetector, draw_person_pose chainer.using_config('enable_backprop', False) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) while True: # get video frame ret, img = cap.read() if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img)
type=str, default=False, help="pose detector mode (precise or not)") parser.add_argument("--video_path", "-vp", type=str, default=None, help="video path to make a data") args = parser.parse_args() # load model hand_detector = HandDetector("handnet", "../models/handnet.npz", device=args.gpu) pose_detector = PoseDetector("posenet", "../models/coco_posenet.npz", device=args.gpu, precise=args.precise) # ビデオをキャプチャーして解析し,保存する cap = cv2.VideoCapture(args.video_path) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) if not cap.isOpened(): sys.exit() train_file = open("train.csv", "a") train_file.write("hand_position,right_hand,left_hand") while True: # get video frame
def main(cap, im_scale=2, view_results=False): debug_i = 0 fps_timer_arr = [0] * 16 fps = 0 # load model pose_device = 0 pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models' pose_detector = PoseDetector("posenet", f"{pose_model_dir}/coco_posenet.npz", device=pose_device) hand_detector = HandDetector("handnet", f"{pose_model_dir}/handnet.npz", device=pose_device) # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)) if view_results: cv2.namedWindow('display') video_label_file = VideoLabelFile(cap.video_fname, fname_add='pre_points_pose') labels_current = defaultdict(lambda: []) labels_all_previous = video_label_file.load_previous() im_input = cap.read() im_input_shape = im_input.shape[0:2] first_run = True while (not cap.eof): fps_time_begin = time.perf_counter() debug_i += 1 im_input = cap.read() current_frame_id = cap.frame_idx() # print(cap.info()) im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale), round(im_input_shape[0] / im_scale))) if first_run: print( f"Video size {im_input.shape} -> Model input size {im_pose.shape}" ) first_run = False ########################################## person_pose_array, _ = pose_detector(im_pose) im_display = cv2.addWeighted( im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0) for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose) # arr = np.array([a for a in person_pose if a is not None]) # if arr.any(): # arr[:, 0:2] *= im_scale # labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()]) # hands estimation hands = pose_detector.crop_hands(im_pose, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) ############################################# for l in labels_current[current_frame_id]: cv2.circle(im_display, (round(l[0][0] / im_scale), round(l[0][1] / im_scale)), 10, (255, 0, 0), 2) cv2.circle(im_display, (round(l[1][0] / im_scale), round(l[1][1] / im_scale)), 10, (0, 255, 0), 2) cv2.putText(im_display, f"frame {int(current_frame_id)}, fps: {int(fps)}.", (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) if view_results: #cv2.imshow('display', im_display) cv2.imshow('display', im_pose) else: print(".", end="") sys.stdout.flush() # labels_current[current_frame_id].append ############################################# ## KEYBOARD k = cv2.waitKey(5) if k == 27: # esc break elif k == ord('c'): import ipdb ipdb.set_trace() # ipdb.set_trace() # pdb.set_trace() fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr)) print(". ") # cap.release() video_label_file.save_current_labels(labels_current, append_previous=False, custom_lists=True) if view_results: cv2.destroyAllWindows()
chainer.config.enable_backprop = False chainer.config.train = False # get directory of data (rgb, depth) print('Getting data from: {}'.format(args.data)) dm = DataManagement(args.data) after = dt(2018, 7, 23, 14, 0, 0) before = dt(2018, 7, 23, 15, 0, 0) datetimes = dm.get_datetimes_in(after, before) # camera params o3_chain = Open3D_Chain() # load model pose_detector = PoseDetector("posenet", os.path.join(abs_op_lib, "models/coco_posenet.npz"), device=args.gpu) for datetime in datetimes: save_path = dm.get_save_directory(datetime) save_path = os.path.join(save_path, args.save) if not dm.check_path_exists(save_path): print('Making a save directory in: {}'.format(save_path)) os.makedirs(save_path) else: continue rgb_path = dm.get_rgb_path(datetime) depth_path = dm.get_depth_path(datetime) # sort rgb files before looping
class BlitzDetection: def __init__(self, cfg, weight, meta, W, H, camera_offset, tcp): self.net = darknet.load_net(cfg, weight, 0) self.meta = darknet.load_meta(meta) self.bridge = CvBridge() # receiving image's resolution self.W = W self.H = H # camera's position referencing base_link self.CAMERA_OFFSET = camera_offset # tcp self.TCP = tcp # Openpose Pose Detector self.pose_detector = PoseDetector(thresh=0.5, visualize=True) print('Detector loaded') def get_image(self, camera='/camera/color/image_raw'): img = rospy.wait_for_message(camera, Image) try: cv2_img = self.bridge.imgmsg_to_cv2(img, 'bgr8') except CvBridgeError as e: print e print "Image Received" return cv2_img ''' def get_depth_image(self): depth_img = rospy.wait_for_message('/xtion/depth/image_raw', Image) try: depth_img = self.bridge.imgmsg_to_cv2(depth_img) # depth in mm unit except CvBridgeError as e: print e return depth_img/1000.0 # change to m unit ''' def detection_all(self, thresh=0.7): cv_img = self.get_image() r = darknet.darknet(self.net, self.meta, cv_img) if len(r) == 0: print 'Could not detect anything' return None detection_list = [] for item in r: name, prob, box_info = item if prob >= thresh: print '{} detected!'.format(name) detection_list.append(item) return detection_list def detection(self, target='teddy bear'): cv2_img = self.get_image() r = darknet.detect(self.net, self.meta, cv2_img) if len(r) == 0: print "Could not detect anything" return None for item in r: if target in item: print "Found teddy bear in the image" return r else: pass print "No teddy bear in this image" return None def detection_image_input(self, cv_img, target='teddy bear'): r = darknet.detect(self.net, self.meta, cv_img) if len(r) == 0: print "Could not detect anything" return None for item in r: if target in item: print "Found {} in the image".format(target) return r else: pass print "No {} in this image".format(target) return None def target_object(self, r, target='teddy bear'): for item in r: name, prob, box_info = item print(name) return [item for item in r if target in item][0] def detected_cloud(self, target, box_info): cloud = ros_numpy.numpify( rospy.wait_for_message('/camera/depth_registered/points', PointCloud2)) target_cloud = [] for i in range(self.H): for j in range(self.W): point = cloud[i, j] if math.isnan(point[0]) or math.isnan(point[1]) or math.isnan( point[2]): target_cloud.append((0, 0, 0)) continue (x, y, w, h) = box_info if j >= x - w / 2 and j <= x + w / 2 and i >= y - h / 2 and i <= y + h / 2: target_cloud.append((point[0], point[1], point[2])) else: target_cloud.append((0, 0, 0)) # visualize target's point cloud ''' fields = [ PointField('x', 0, PointField.FLOAT32, 1), PointField('y', 4, PointField.FLOAT32, 1), PointField('z', 8, PointField.FLOAT32, 1), ] header = Header() header.frame_id = 'camera_depth_optical_frame' pub = rospy.Publisher('arm_move_point', PointCloud2, queue_size=2) pub.publish(pc2.create_cloud(header, fields, target_cloud)) ''' return target_cloud def target_position(self, cloud): point_list = [] for point in cloud: if point == (0, 0, 0): continue else: x = self.CAMERA_OFFSET[0] + point[2] y = self.CAMERA_OFFSET[1] - point[0] z = self.CAMERA_OFFSET[2] - point[1] point_list.append((x, y, z)) sorted_by_depth = sorted(point_list, key=lambda point: point[0]) object_list = sorted_by_depth[:len(sorted_by_depth) / 2] x = sum([item[0] for item in object_list]) / len(object_list) y = sum([item[1] for item in object_list]) / len(object_list) z = sum([item[2] for item in object_list]) / len(object_list) return (x, y, z) def tcp_calc(self, x, y, z): differ = (x - self.TCP[0], y - self.TCP[1], z - self.TCP[2]) return differ def crop(self, cv_img, x, y, w, h): cropped = cv_img[int(y - h / 2):int(y + h / 2), int(x - w / 2):int(x + w / 2)] return cropped def hand_waving_callback(self, img): try: rgb_img = self.bridge.imgmsg_to_cv2(img, 'bgr8') except CvBridgeError as e: print e return self.hand_wave_img = rgb_img return def is_hand_waving(self): N = 10 i = 0 ''' rate = rospy.Rate(5) rospy.Subscriber('/xtion/rgb/image_rect_color', Image, self.hand_waving_callback) ''' while i < N: ''' while self.hand_wave_img is None: pass ''' hand_wave_img = self.get_image( camera='/xtion/rgb/image_rect_color') r = self.detection_image_input(hand_wave_img, 'person') if r is None: continue name, prob, (x, y, w, h) = self.target_object(r, target='person') person_img = self.crop(hand_wave_img, x, y, w, h) # Directly putting briged img to Openpose causes fcuked up results cv2.imwrite('hand_waving_frames/person_frame_{}.jpg'.format(i), person_img) i += 1 is_waving = self.pose_detector.predict(N) if is_waving: print('A person is waving hand') else: print('A person is not waving hand') return
def main(): parser = argparse.ArgumentParser(description='Train pose estimation') parser.add_argument('--batchsize', '-B', type=int, default=10, help='Training minibatch size') parser.add_argument('--valbatchsize', '-b', type=int, default=4, help='Validation minibatch size') parser.add_argument('--log-interval', type=int, default=20) parser.add_argument('--vis-interval', type=int, default=20) parser.add_argument('--val-interval', type=int, default=1000) parser.add_argument('--val-samples', type=int, default=100, help='Number of validation samples') parser.add_argument('--iteration', '-i', type=int, default=300000, help='Number of iterations to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--optimizer', type=str, default='adabound', choices=['adabound', 'adam'], help='Optimizer') parser.add_argument('--test', action='store_true') parser.add_argument('--domain-randomization', action='store_true') parser.add_argument('--augment', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() result_output_path = make_fancy_output_dir(args.out, args=args) print("output file: {}".format(result_output_path)) model = OpenPoseNet( len(fashion_landmark_utils.UpperClothJointType) + 1, len(fashion_landmark_utils.upper_cloth_joint_pairs) * 2) from chainer_openpose.datasets.coco import coco_utils people_model = OpenPoseNet( len(coco_utils.JointType) + 1, len(coco_utils.coco_joint_pairs) * 2) chainer.serializers.load_npz( coco_utils.get_coco_pretrained_model(), people_model) layer_names = ['Mconv2_stage2_L1', 'Mconv2_stage2_L2', 'Mconv2_stage3_L1', 'Mconv2_stage3_L2', 'Mconv2_stage4_L1', 'Mconv2_stage4_L2', 'Mconv2_stage5_L1', 'Mconv2_stage5_L2', 'Mconv2_stage6_L1', 'Mconv2_stage6_L2', 'Mconv3_stage2_L1', 'Mconv3_stage2_L2', 'Mconv3_stage3_L1', 'Mconv3_stage3_L2', 'Mconv3_stage4_L1', 'Mconv3_stage4_L2', 'Mconv3_stage5_L1', 'Mconv3_stage5_L2', 'Mconv3_stage6_L1', 'Mconv3_stage6_L2', 'Mconv4_stage2_L1', 'Mconv4_stage2_L2', 'Mconv4_stage3_L1', 'Mconv4_stage3_L2', 'Mconv4_stage4_L1', 'Mconv4_stage4_L2', 'Mconv4_stage5_L1', 'Mconv4_stage5_L2', 'Mconv4_stage6_L1', 'Mconv4_stage6_L2', 'Mconv5_stage2_L1', 'Mconv5_stage2_L2', 'Mconv5_stage3_L1', 'Mconv5_stage3_L2', 'Mconv5_stage4_L1', 'Mconv5_stage4_L2', 'Mconv5_stage5_L1', 'Mconv5_stage5_L2', 'Mconv5_stage6_L1', 'Mconv5_stage6_L2', 'Mconv6_stage2_L1', 'Mconv6_stage2_L2', 'Mconv6_stage3_L1', 'Mconv6_stage3_L2', 'Mconv6_stage4_L1', 'Mconv6_stage4_L2', 'Mconv6_stage5_L1', 'Mconv6_stage5_L2', 'Mconv6_stage6_L1', 'Mconv6_stage6_L2', 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3_CPM', 'conv4_4_CPM', 'conv5_1_CPM_L1', 'conv5_1_CPM_L2', 'conv5_2_CPM_L1', 'conv5_2_CPM_L2', 'conv5_3_CPM_L1', 'conv5_3_CPM_L2', 'conv5_4_CPM_L1', 'conv5_4_CPM_L2',] for layer_name in layer_names: model[layer_name].copyparams(people_model[layer_name]) # model[layer_name].disable_update() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) train_chain = OpenPoseTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train_chain.to_gpu() if args.optimizer == 'adam': optimizer = chainer.optimizers.Adam(alpha=1e-4, beta1=0.9, beta2=0.999, eps=1e-08) elif args.optimizer == 'adabound': optimizer = chainer.optimizers.AdaBound(alpha=1e-4, beta1=0.9, beta2=0.999, eps=1e-08) optimizer.setup(train_chain) train_datasets = FashionLandmarkKeypointsDataset(split='train') train = TransformDataset(train_datasets, Transform(mode='train')) val_datasets = FashionLandmarkKeypointsDataset(split='val') val = TransformDataset(val_datasets, Transform(mode='val')) if args.loaderjob: train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(val, args.batchsize) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), result_output_path) val_interval = (args.val_interval, 'iteration') log_interval = (args.log_interval, 'iteration') vis_interval = (args.vis_interval, 'iteration') trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'val/loss', 'main/paf', 'val/paf', 'main/heatmap', 'val/heatmap', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) # log plotter trainer.extend(extensions.PlotReport([ 'main/loss', 'val/loss', 'main/paf', 'val/paf', 'main/heatmap', 'val/heatmap'], file_name='loss.png', trigger=log_interval), trigger=log_interval) # Visualization. pose_detector = PoseDetector( model, fashion_landmark_utils.UpperClothJointType, fashion_landmark_utils.upper_cloth_joint_pairs, device=args.gpu) trainer.extend( OpenPoseVisReport( val_iter, pose_detector, fashion_landmark_utils.upper_cloth_joint_pairs), trigger=vis_interval) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
import chainer from entity import params from pose_detector import PoseDetector, draw_person_pose from face_detector import FaceDetector, draw_face_keypoints from hand_detector import HandDetector, draw_hand_keypoints chainer.using_config('enable_backprop', False) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--img', help='image file path') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(args.img) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # each person detected for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose)
def estimate_pose(img_path, gpu = -1): # parser = argparse.ArgumentParser(description='Pose detector') # parser.add_argument('--img', help='image file path') # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') # args = parser.parse_args() # load model print("Loading pose detection model...") pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu) print("Loading hand detection model...") hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu) # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(img_path) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized has_detected = False # each person detected for person_pose in person_pose_array: if has_detected: continue has_detected = True print("Body:", person_pose) unit_length = pose_detector.get_unit_length(person_pose) # face estimation # print("Estimating face keypoints...") # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length) # if cropped_face_img is not None: # face_keypoints = face_detector(cropped_face_img) # res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1])) # cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) # hands estimation print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") print("Left hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") print("Right hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) print('Saving result into result.png...') cv2.imwrite('result.png', res_img)