def doHandPoseEstimate(image_cv, sess, args): image_tf = args['image_tf'] hand_side_tf = args['hand_side_tf'] evaluation = args['evaluation'] net = args['net'] hand_scoremap_tf = args['hand_scoremap_tf'] image_crop_tf = args['image_crop_tf'] scale_tf = args['scale_tf'] center_tf = args['center_tf'] keypoints_scoremap_tf = args['keypoints_scoremap_tf'] keypoint_coord3d_tf = args['keypoint_coord3d_tf'] image_raw = image_cv[:, :, ::-1] image_raw = cv2.resize(image_raw, (320, 240)) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) plt.ion() plt.clf() ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) plt.show() plt.pause(0.0001) plt.show()
def get_coords_and_figure_from_name(img_name): image_raw = scipy.misc.imread(img_name) hand_scoremap_v, image_crop_v, scale_v, center_v, \ keypoints_scoremap_v, keypoint_coord3d_v = run_model_on_image(image_raw) # from here on: saving stuff basename = os.path.splitext(os.path.basename(img_name))[0] # post processing hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # save keypoint coordinates keypoint_save_filename = "{:s}/{:s}_coords.pkl".format( output_coords_dir, basename) with open(keypoint_save_filename, 'wb') as f: pickle.dump((keypoint_coord3d_v, scale_v, center_v), f, protocol=-1) print("Saved keypoint coordinates to {:s}".format(keypoint_save_filename)) # save image image_save_filename = "{:s}/{:s}_figures.png".format( output_figures_dir, basename) fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) plt.savefig(image_save_filename) plt.close() print("Saved figure to {:s}".format(image_save_filename))
def visualize(scoremap, hand_side, rot_mat, coord3d_can, coord3d, coord2d): import pdb import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from utils.general import plot_hand_3d, plot_hand import numpy as np l = scoremap.shape[0] for i in range(l): l_scoremap = scoremap[i, :, :, :] l_coord3d = coord3d[i, :, :] l_coord3d_can = coord3d_can[i, :, :] * 6.0 l_rot_mat = rot_mat[i, :, :] l_coord2d = coord2d[i, :, :] if hand_side[i, 1] == 1: print('flip') # l_coord3d_can[:, 2] = -l_coord3d_can[:, 2] # l_coord3d_rotate = np.dot(l_coord3d_can, l_rot_mat) l_coord3d -= l_coord3d[0, :] # l_coord3d_rotate *= 2.0 s = l_scoremap.shape keypoint_coords = np.zeros((s[2], 2)) for i in range(s[2]): v, u = np.unravel_index(np.argmax(l_scoremap[:, :, i]), (s[0], s[1])) keypoint_coords[i, 0] = v keypoint_coords[i, 1] = u fig = plt.figure(1) ax1 = fig.add_subplot(131) ax1.imshow(np.amax(l_scoremap, axis=2)) ax2 = fig.add_subplot(132, projection='3d') plot_hand_3d(l_coord3d, ax2, color_fixed=np.array([1.0, 0.0, 1.0])) plot_hand_3d(l_coord3d_can, ax2, color_fixed=np.array([0.0, 1.0, 0.0])) ax2.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax3 = fig.add_subplot(133) plot_hand(keypoint_coords, ax3, color_fixed=np.array([0.0, 1.0, 0.0])) plt.gca().invert_yaxis() plt.xlabel('x') plt.ylabel('y') plt.show()
crop_scale_noise=True) data = d.get(read_image=True) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) tf.train.start_queue_runners(sess=sess) from utils.general import detect_keypoints_3d, plot_hand_3d, plot_hand, detect_keypoints import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D for i in range(50): image, image_crop, keypoint_uv21, img_dir, keypoint_uv21_origin, hand_side \ = sess.run([data['image'], data['image_crop'], data['keypoint_uv21'], data['img_dir'], data['keypoint_uv21_origin'], data['hand_side']]) print(img_dir[0].decode()) print(hand_side) image_crop = np.squeeze((image_crop + 0.5) * 255).astype(np.uint8) image = np.squeeze((image + 0.5) * 255).astype(np.uint8) keypoint_uv21 = np.squeeze(keypoint_uv21) keypoint_uv21_origin = np.squeeze(keypoint_uv21_origin) fig = plt.figure() ax = fig.add_subplot(121) ax.imshow(image_crop) plot_hand(keypoint_uv21[:, ::-1], ax) ax = fig.add_subplot(122) ax.imshow(image) plot_hand(keypoint_uv21_origin[:, ::-1], ax) plt.show()
hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) # keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plt.cla() # fig = plt.imshow(image_raw, aspect='equal', shape=(240, 320)) fig = plt.imshow(np.zeros([240, 320, 3]), aspect='equal', shape=(240, 320)) plot_hand(coord_hw, plt) plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.pause(0.001) plt.show(False) name = datetime.datetime.now() if not os.path.exists("out"): os.mkdir("out", 0o0755) cv2.imwrite("out/imag_" + str(name) + ".jpg", image_raw[..., ::-1]) plt.savefig("out/skel_" + str(name) + ".jpg", bbox_inches='tight', pad_inches=0) # visualize
image_raw = scipy.misc.imresize(image_raw, (240, 320)) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # plot the full image imsh.set_data(image_raw) # plot the camera image` plot_hand(coord_hw, ax) # plot the hand stick figure # # plot the cropped image # imsh.set_data(image_crop_v) # plot the camera image` # plot_hand(coord_hw_crop, ax) # plot the hand stick figure # plt.title(title) plt.draw() plt.pause(0.001)
feed_dict={image_tf: image_v}) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) plt.show()
coord2d_v = detect_keypoints(scoremap_v) * 8 # center gt keypoint_xyz21 -= keypoint_xyz21[0, :] if (i % 100) == 0: print('%d / %d images done: %.3f percent' % (i, dataset.num_samples, i * 100.0 / dataset.num_samples)) if args.visualize: fig = plt.figure(1) ax1 = fig.add_subplot(121, projection='3d') plot_hand_3d(keypoint_xyz21, ax1, color_fixed=np.array([1.0, 0.0, 0.0])) ax1.view_init( azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view plt.xlabel('x') plt.ylabel('y') ax1.set_xlim(-0.1, 0.1) ax1.set_ylim(-0.1, 0.1) ax1.set_zlim(-0.1, 0.1) ax2 = fig.add_subplot(122) plt.imshow(image_crop_v) plot_hand(coord2d_v, ax2) plt.show() # pdb.set_trace()
scoremap_3d, keypoint_xyz21_normed, image_crop, keypoint_uv21, img_dir, scoremap, hand_side \ = sess.run([data['scoremap_3d'], data['keypoint_xyz21_normed'], data['image_crop'], data['keypoint_uv21'], data['img_dir'], data['scoremap'], data['hand_side']]) print(img_dir[0].decode()) print(hand_side) scoremap_3d = np.squeeze(scoremap_3d) keypoint_xyz21_normed = np.squeeze(keypoint_xyz21_normed) image_crop = np.squeeze((image_crop + 0.5) * 255).astype(np.uint8) keypoint_uv21 = np.squeeze(keypoint_uv21) scoremap = np.squeeze(scoremap) keypoints = detect_keypoints_3d(scoremap_3d) keypoints2d = detect_keypoints(scoremap) fig = plt.figure() ax = fig.add_subplot(221, projection='3d') plot_hand_3d(keypoints, ax) ax.invert_yaxis() ax.invert_zaxis() ax = fig.add_subplot(222, projection='3d') plot_hand_3d(keypoint_xyz21_normed, ax) ax.invert_yaxis() ax.invert_zaxis() ax = fig.add_subplot(223) ax.imshow(image_crop) plot_hand(keypoint_uv21[:, ::-1], ax) ax = fig.add_subplot(224) ax.imshow(image_crop) plot_hand(keypoints2d, ax) plt.show()
plt.ylabel('y') ax2 = fig.add_subplot(122) plt.imshow(image_scaled_v) plt.show() # pdb.set_trace() if args.save: fig = plt.figure(figsize=(12, 6)) keypoints2d = detect_keypoints(keypoints_scoremap_v) coord_hw = trafo_coords(keypoints2d, center_v, scale_v, 256) coord_uv21 = keypoint_uv21_v[:, ::-1] / 2 ax1 = fig.add_subplot(121) plt.imshow(image_scaled_v) plot_hand(coord_hw, ax1, color_fixed=np.array((0., 0., 1.0))) plot_hand(coord_uv21, ax1, color_fixed=np.array((1., 0., 0.0))) ax2 = fig.add_subplot(122, projection='3d') plot_hand_3d(coord3d_pred_v, ax2, color_fixed=np.array([0.0, 0.0, 1.0])) plot_hand_3d(keypoint_xyz21, ax2, color_fixed=np.array([1.0, 0.0, 0.0])) ax2.set_xlabel('x') ax2.set_ylabel('y') ax2.set_xlim(-0.1, 0.1) ax2.set_ylim(-0.1, 0.1) ax2.set_zlim(-0.1, 0.1) ax2.view_init(azim=-90.0,
# 2D pipeline (Handseg + Posenet) keypoints_scoremap_v, image_crop_v, scale_v, center_v, raw_scoremap_v = sess.run( [ keypoints_scoremap_tf, image_crop_tf, scale_crop_tf, center_tf, raw_scoremap_tf ], feed_dict={image_tf: image_v}) if (raw_scoremap_v.max() < 0.99999): continue #hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) #--- Uncomment these lines coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plot_hand(coord_hw, image_raw) #image_raw = cv2.cvtColor(image_raw, cv2.COLOR_RGB2BGR) plt.imshow(image_raw) #time_string = strftime("%d-%b-%Y-%H-%M-%S", gmtime()) #plt.savefig('results/detected_image' + time_string + '.png') plt.show()
ax1 = fig.add_subplot(121, projection='3d') plot_hand_3d(coord3d_pred_v, ax1, color_fixed=np.array([0.0, 0.0, 1.0])) ax1.view_init( azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view plt.xlabel('x') plt.ylabel('y') ax1.set_xlim(-0.1, 0.1) ax1.set_ylim(-0.1, 0.1) ax1.set_zlim(-0.1, 0.1) ax2 = fig.add_subplot(122) plt.imshow(image_crop_v) plot_hand(coord2d_v, ax2, color_fixed=np.array([0.0, 0.0, 1.0])) plot_hand(keypoint_uv21[:, ::-1], ax2, color_fixed=np.array([1.0, 0.0, 0.0])) plt.show() # pdb.set_trace() if i >= 0: if args.save: coord3d_pred_v_fliped = np.copy(coord3d_pred_v) coord3d_pred_v_fliped[:, 0] = -coord3d_pred_v_fliped[:, 0] fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') plot_hand_3d(coord3d_pred_v_fliped, ax1,
use_wrist_coord=False, coord_uv_noise=True, crop_center_noise=True, crop_offset_noise=True, crop_scale_noise=True, mpii=True, nzsl=True) data = d.get(read_image=True) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) tf.train.start_queue_runners(sess=sess) from utils.general import detect_keypoints, plot_hand import matplotlib.pyplot as plt # from mpl_toolkits.mplot3d import Axes3D for i in range(50): scoremap, keypoint_uv21, image_crop = sess.run( [data['scoremap'], data['keypoint_uv21'], data['image_crop']]) scoremap = np.squeeze(scoremap) image_crop = np.squeeze((image_crop + 0.5) * 255).astype(np.uint8) keypoint_uv21 = np.squeeze(keypoint_uv21) keypoints = detect_keypoints(scoremap) fig = plt.figure() ax = fig.add_subplot(111) plot_hand(keypoints, ax) plt.imshow(image_crop) plt.show()
coord_uv_pred_crop = np.stack([coord_hw_pred_crop[:, 1], coord_hw_pred_crop[:, 0]], 1) image_crop = np.squeeze((image_crop+0.5)*255).astype(np.uint8) if not dataset.use_wrist_coord: kp_uv21_gt[0, :] = 2 * kp_uv21_gt[0, :] - kp_uv21_gt[12, :] coord_uv_pred_crop[0, :] = 2 * coord_uv_pred_crop[0, :] - coord_uv_pred_crop[12, :] coord_hw_pred_crop[0, :] = 2 * coord_hw_pred_crop[0, :] - coord_hw_pred_crop[12, :] util.feed(kp_uv21_gt/184, kp_vis, coord_uv_pred_crop/184) if (i % 100) == 0: print('%d / %d images done: %.3f percent' % (i, dataset.num_samples, i*100.0/dataset.num_samples)) if args.visualize: plt.imshow(image_crop) plot_hand(coord_hw_pred_crop, plt, color_fixed=np.array([0.0, 0.0, 1.0])) plot_hand(kp_uv21_gt[:, ::-1], plt, color_fixed=np.array([1.0, 0.0, 0.0])) plt.show() if args.save: result = {} result['img_dir'] = img_dir[0].decode() for i in (1, 5, 9, 13, 17): coord_uv_pred_crop[i:i+4] = coord_uv_pred_crop[i+3:i-1:-1] # reverse the order of fingers (from palm to tip) if int(hand_side[1]): coord_uv_pred_crop[:, 0] = net.crop_size - 1 - coord_uv_pred_crop[:, 0] coord_uv_pred_crop -= net.crop_size//2 result['hand2d'] = (coord_uv_pred_crop/crop_scale + crop_center[::-1]).tolist() results.append(result) mean, median, auc, pck_curve_all, threshs = util.get_measures(0.0, 1.0, 100)
def get_pic(image_list): # images to be shown # image_list = list() # image_list.append('./data/img30.jpg') #image_list.append('./data/img31.jpg') #image_list.append('./data/img32.jpg') #image_list.append('./data/img33.jpg') # network input image_tf = tf.placeholder(tf.float32, shape=(1, 320, 240, 3)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session()#config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) # Feed image list through network final = list() for i, image in tqdm(enumerate(image_list)): image_raw = image # image_raw = cv2.resize(image_raw, dsize=(240, 320))#, interpolation=cv2.INTER_CUBI) image_raw = make_it_small.small(image_raw) for row in image_raw: for pixel in row: temp = pixel[0] pixel[0] = pixel[2] pixel[2] = temp image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) img_angle = description_of_hand_position(keypoint_coord3d_v) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) fig.suptitle(img_angle,fontsize = 10) plt.savefig("imgs/{}.png".format(str(i))) plt.close(fig) img = imageio.imread("imgs/{}.png".format(str(i))) final.append(img) return final
# plot_hand_3d(coord3d_pred_v, ax1) # plot_hand_3d(keypoint_xyz21, ax1) ax1.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view plt.xlabel('x') plt.ylabel('y') ax2 = fig.add_subplot(122) scoremap_v = np.squeeze(scoremap_v) s = scoremap_v.shape keypoint_coords = np.zeros((s[2], 2)) for i in range(s[2]): v, u = np.unravel_index(np.argmax(scoremap_v[:, :, i]), (s[0], s[1])) keypoint_coords[i, 0] = v keypoint_coords[i, 1] = u plot_hand(keypoint_coords, ax2, color_fixed=np.array([1.0, 0.0, 0.0])) plt.gca().invert_yaxis() plt.xlabel('x') plt.ylabel('y') plt.show() # pdb.set_trace() # Output results mean, median, auc, _, _ = util.get_measures(0.0, 0.050, 20) print('Evaluation results for %s:' % VARIANT) print('Average mean EPE: %.3f mm' % (mean * 1000)) print('Average median EPE: %.3f mm' % (median * 1000)) print('Area under curve: %.3f' % auc)