def __init__(self, use_gpu=False): actions = data_utils.define_actions(FLAGS.action) rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) self.train_set_2d = train_set_2d self.test_set_2d = test_set_2d self.data_mean_2d = data_mean_2d self.data_std_2d = data_std_2d self.dim_to_use_2d = dim_to_use_2d self.dim_to_ignore_2d = dim_to_ignore_2d self.train_set_3d = train_set_3d self.test_set_3d = test_set_3d self.data_mean_3d = data_mean_3d self.data_std_3d = data_std_3d self.dim_to_use_3d = dim_to_use_3d self.dim_to_ignore_3d = dim_to_ignore_3d device_count = {"GPU": 1} if use_gpu else {"GPU": 0} self.persistent_sess = tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True)) with self.persistent_sess.as_default(): self.graph = tf.get_default_graph() self.model = create_model(self.persistent_sess, actions, batch_size)
def read_2d_predictions(actions, data_dir): rcams, vcams = cameras.load_cameras('cameras.h5', [1, 5, 6, 7, 8, 9, 11], n_interpolations=0) train_set = load_stacked_hourglass(data_dir, [1, 5, 6, 7, 8], actions) test_set = load_stacked_hourglass(data_dir, [9, 11], actions) #test_set = load_stacked_hourglass( data_dir, [9], actions) complete_train = copy.deepcopy(np.vstack(train_set.values())) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2) #print("DIM TO IGNORE",dim_to_ignore) train_set = normalize_data(train_set, data_mean, data_std, dim_to_use, actions, 2) test_set = normalize_data(test_set, data_mean, data_std, dim_to_use, actions, 2) return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
def normalize_batch(frames): actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] # Get training data stats rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) mu = np.mean(frames, axis=0)[dim_to_use_2d] stddev = np.std(frames, axis=0)[dim_to_use_2d] # Normalize input enc_in = np.divide(frames[:, dim_to_use_2d] - np.tile(mu, (frames.shape[0], 1)), np.tile(stddev, (frames.shape[0], 1))) return enc_in, data_mean_3d, data_std_3d, dim_to_ignore_3d
def evaluate(current_step=0): """Evaluate on all the test set""" if FLAGS.load <= 0: raise (ValueError, "Must give an iteration to read parameters from") actions = define_actions(FLAGS.action) rcams, vcams = cameras.load_cameras('cameras.h5', [1, 5, 6, 7, 8, 9, 11]) # Load and normalize all the data train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions, offsets_train, offsets_test = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, vcams) if (FLAGS.use_sh): train_set_2d, test_set_2d, data_mean_2d, data_std_2d, _, _ = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, _, _ = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams, vcams) print("done reading and normalizing data.") # Limit TF to take a fraction of the GPU memory gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} isTraining = False with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, device_count=device_count)) as sess: # === Create the model === model = create_model(sess, isTraining, dim_to_use_3d, FLAGS.batch_size, data_mean_3d, data_std_3d, dim_to_ignore_3d) print("Model created") cum_err = 0 print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs for action in actions: tot_act_err = 0 print("{0:<12} ".format(action), end="") #print(test_set_2d_gt.keys()) action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) action_test_set_2d_gt = [] for key2d in action_test_set_2d.keys(): #print(key2d) (subj, b, fname) = key2d # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else ( subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if (fname.endswith('-sh') and FLAGS.camera_frame) else key3d #key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format(fname.split('.')[0])) if fname.endswith('-sh'): fname = fname[:-3] #print("###NAME OF THE FILE", fname[:-3]) enc_in = {} dec_out = {} enc_in[key2d] = test_set_2d[key2d] dec_out[key3d] = test_set_3d[key3d] pose_2d_gt_list = [] encoder_inputs, decoder_outputs = model.get_all_batches( enc_in, dec_out, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, current_step, encoder_inputs, decoder_outputs) tot_act_err = tot_act_err + act_err print("{0:>6.2f}".format(tot_act_err / len(action_test_set_2d.keys()))) cum_err = cum_err + tot_act_err / len(action_test_set_2d.keys()) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) return cum_err / float(len(actions))
camera['fx'] = f[0] camera['fy'] = f[1] camera['cx'] = c[0] camera['cy'] = c[1] camera['k'] = k camera['p'] = p camera['name'] = name return camera if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('dataset_root_dir') args = parser.parse_args() cams = cameras.load_cameras( bpath=os.path.join(args.dataset_root_dir, 'cameras.h5')) train_dirs, val_dirs = find_train_val_dirs(args.dataset_root_dir) train_val_datasets = [train_dirs, val_dirs] dbs = [] video_count = 0 for dataset in train_val_datasets: db = [] for video in dataset: if np.mod(video_count, 1) == 0: print('Process {}: {}'.format(video_count, video)) data = load_db(args.dataset_root_dir, video, video_count, cams) db.extend(data) video_count += 1 dbs.append(db)
def sample(): """Get samples from a model and visualize them""" actions = data_utils.define_actions( FLAGS.action ) # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d print( "Subject: {}, action: {}, fname: {}".format(subj, b, fname) ) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if (fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[ key2d ] n2d, _ = enc_in.shape dec_out = test_set_3d[ key3d ] n3d, _ = dec_out.shape assert n2d == n3d # Split into about-same-size batches enc_in = np.array_split( enc_in, n2d // batch_size ) dec_out = np.array_split( dec_out, n3d // batch_size ) all_poses_3d = [] for bidx in range( len(enc_in) ): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # denormalize enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d ) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d ) poses3d = data_utils.unNormalizeData( poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d ) all_poses_3d.append( poses3d ) # Put all the poses together enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, all_poses_3d] ) # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile( test_root_positions[ key3d ], [1,N_JOINTS_H36M] ) # Load the appropriate camera subj, _, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj,c+1): rcams[(subj,c+1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [scams[(subj,c+1)][-1] for c in range(N_CAMERAS)].index( cname ) # index of camera used the_cam = scams[(subj, scam_idx+1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame(data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape((-1, N_JOINTS_H36M*3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:,:3], (1,N_JOINTS_H36M) ) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) poses3d = cam2world_centered(poses3d) # Grab a random batch to visualize enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, poses3d] ) idx = np.random.permutation( enc_in.shape[0] ) enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :] # Visualize random samples import matplotlib.gridspec as gridspec # 1080p = 1,920 x 1,080 fig = plt.figure( figsize=(19.2, 10.8) ) gs1 = gridspec.GridSpec(5, 9) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') subplot_idx, exidx = 1, 1 nsamples = 15 for i in np.arange( nsamples ): # Plot 2d pose ax1 = plt.subplot(gs1[subplot_idx-1]) p2d = enc_in[exidx,:] viz.show2Dpose( p2d, ax1 ) ax1.invert_yaxis() # Plot 3d gt ax2 = plt.subplot(gs1[subplot_idx], projection='3d') p3d = dec_out[exidx,:] viz.show3Dpose( p3d, ax2 ) # Plot 3d predictions ax3 = plt.subplot(gs1[subplot_idx+1], projection='3d') p3d = poses3d[exidx,:] viz.show3Dpose( p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71" ) exidx = exidx + 1 subplot_idx = subplot_idx + 3 plt.show()
def test(): """ Evaluate on test set """ actions = data_utils.define_actions(FLAGS.action) number_of_actions = len(actions) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model(sess, actions, FLAGS.batch_size) model.train_writer.add_graph(sess.graph) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 for _ in xrange(1): # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs, _ = data_utils.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False,\ n_context=FLAGS.n_context, new_dim=False, batch_size=FLAGS.batch_size) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, test_set_2d=action_test_set_2d) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) # summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err/float(len(actions)))} ) # model.test_writer.add_summary( summaries, current_step ) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: pass # n_joints = 17 if not(FLAGS.predict_14) else 14 # encoder_inputs, decoder_outputs, _ = data_utils.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame,\ # training=False, n_context=FLAGS.n_context, new_dim=False, batch_size=FLAGS.batch_size) # # total_err, joint_err, step_time, loss = evaluate_batches( sess, model, # data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, # data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, # current_step, encoder_inputs, decoder_outputs, current_epoch ) # # print("=============================\n" # "Step-time (ms): %.4f\n" # "Val loss avg: %.4f\n" # "Val error avg (mm): %.2f\n" # "=============================" % ( 1000*step_time, loss, total_err )) # # for i in range(n_joints): # # 6 spaces, right-aligned, 5 decimal places # print("Error in joint {0:02d} (mm): {1:>5.2f}".format(i+1, joint_err[i])) # print("=============================") # Log the error to tensorboard # summaries = sess.run( model.err_mm_summary, {model.err_mm: total_err} ) # model.test_writer.add_summary( summaries, current_step ) # Save the model # print( "Saving the model... ", end="" ) # start_time = time.time() # model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step ) # print( "done in {0:.2f} ms".format(1000*(time.time() - start_time)) ) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def main(_): #ABS_DIR = os.path.abspath('.') posf = open(pose_output_dir, 'w') #smoothedf = open(ABS_DIR + '/tmp/smoothed.txt', 'w') smoothed = read_openpose_json() plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) pngName = 'gif_output/smooth_plot.png' smooth_curves_plot.savefig(pngName) logger.info('writing gif_output/smooth_plot.png') if FLAGS.interpolation: logger.info("start interpolation") framerange = len( smoothed.keys() ) joint_rows = 36 array = np.concatenate(list(smoothed.values())) array_reshaped = np.reshape(array, (framerange, joint_rows) ) print(array_reshaped[0,:]) arm = [4,5,6,7,8,9,10,11] multiplier = FLAGS.multiplier multiplier_inv = 1/multiplier out_array = np.array([]) for row in range(joint_rows): x = [] for frame in range(framerange): x.append( array_reshaped[frame, row] ) frame = range( framerange ) frame_resampled = np.arange(0, framerange, multiplier) spl = UnivariateSpline(frame, x, k=3) #relative smooth factor based on jnt anim curve min_x, max_x = min(x), max(x) smooth_fac = max_x - min_x if row in arm: smooth_resamp = 1 else: smooth_resamp = 75 smooth_fac = smooth_fac * smooth_resamp spl.set_smoothing_factor( float(smooth_fac) ) xnew = spl(frame_resampled) out_array = np.append(out_array, xnew) logger.info("done interpolating. reshaping {0} frames, please wait!!".format(framerange)) a = np.array([]) for frame in range( int( framerange * multiplier_inv ) ): jnt_array = [] for jnt in range(joint_rows): jnt_array.append( out_array[ jnt * int(framerange * multiplier_inv) + frame] ) a = np.append(a, jnt_array) a = np.reshape(a, (int(framerange * multiplier_inv), joint_rows)) out_array = a interpolate_smoothed = {} for frame in range( int(framerange * multiplier_inv) ): interpolate_smoothed[frame] = list( out_array[frame] ) plt.figure(3) smoothed = interpolate_smoothed interpolate_curves_plot = show_anim_curves(smoothed, plt) pngName = 'gif_output/interpolate_{0}.png'.format(smooth_resamp) interpolate_curves_plot.savefig(pngName) logger.info('writing gif_output/interpolate_plot.png') enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 1} png_lib = [] with tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) iter_range = len(smoothed.keys()) for n, (frame, xy) in enumerate(smoothed.items()): logger.info("calc frame {0}/{1}".format(frame, iter_range)) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] #smoothedf.write(' '.join(map(str, _data))) #smoothedf.write("\n") # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append( poses3d ) enc_in, poses3d = map( np.vstack, [enc_in, all_poses_3d] ) subplot_idx, exidx = 1, 1 _max = 0 _min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > _max: _max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < _min: _min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) if np.min(poses3d) < -1000: try: poses3d = before_pose except: pass p3d = poses3d #viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") # pngName = 'png/pose_frame_{0}.png'.format(str(frame).zfill(12)) # plt.savefig(pngName) # if FLAGS.write_gif: # png_lib.append(imageio.imread(pngName)) before_pose = poses3d write_pos_data(poses3d, ax, posf) posf.close()
import cameras from pykalman import KalmanFilter from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt from torch.utils.data import Dataset # Feel free to use more cells if necessary. # Define actions actions = data_utils.define_actions("All") # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] cameras_path = '/content/gdrive/My Drive/CSE527-HW6-Fall19/h36m/cameras.h5' rcams = cameras.load_cameras(cameras_path, SUBJECT_IDS) # Load data data_dir = '/content/gdrive/My Drive/CSE527-HW6-Fall19/h36m' camera_frame = True predict_14 = False # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, data_dir, camera_frame, rcams, predict_14 ) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections use_sh = False if use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, data_dir, rcams )
def __init__(self, config, mode): self.logger = logging.getLogger(self.__class__.__name__) assert mode in ["train", "test"], "Invalid mode: {}".format(mode) self.config = config self.mode = mode subject_ids = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(config["cameras_path"], subject_ids) self.rcams = rcams if os.path.isfile("data/train.h5") and os.path.isfile("data/test.h5"): with h5py.File("data/train.h5", "r") as f: train_set_3d = {} for k in f["data_3d"]: d = f["data_3d"][k] key = d.attrs["subject"], d.attrs["action"], d.attrs[ "filename"] train_set_3d[key] = d[:] train_set_2d_gt = {} for k in f["data_2d_gt"]: d = f["data_2d_gt"][k] key = d.attrs["subject"], d.attrs["action"], d.attrs[ "filename"] train_set_2d_gt[key] = d[:] with h5py.File("data/test.h5", "r") as f: test_set_3d = {} for k in f["data_3d"]: d = f["data_3d"][k] key = d.attrs["subject"], d.attrs["action"], d.attrs[ "filename"] test_set_3d[key] = d[:] test_set_2d_gt = {} for k in f["data_2d_gt"]: d = f["data_2d_gt"][k] key = d.attrs["subject"], d.attrs["action"], d.attrs[ "filename"] test_set_2d_gt[key] = d[:] self.logger.info( "{} 3d train files, {} 3d test files are loaded.".format( len(train_set_3d), len(test_set_3d))) self.logger.info( "{} 2d GT train files, {} 2d GT test files are loaded.".format( len(train_set_2d_gt), len(test_set_2d_gt))) else: raise Exception("Dataset file is missing!") f_cpn = np.load("data/data_cpn.npz") data_2d_cpn = f_cpn["positions_2d"].item() self.n_frames = config["n_frames"] self.n_joints = config["n_joints"] self.n_bases = config["n_bases"] self.window_slide = config["window_slide"] self.bases = config["bases"] dims_17 = np.where(np.array([x != '' for x in H36M_NAMES]))[0] assert self.n_joints == 17, self.n_joints dim_2d = np.sort(np.hstack([dims_17 * 2 + i for i in range(2)])) dim_3d = np.sort(np.hstack([dims_17 * 3 + i for i in range(3)])) self.left_right_symmetry_2d = np.array( [0, 4, 5, 6, 1, 2, 3, 7, 8, 9, 10, 14, 15, 16, 11, 12, 13]) self.left_right_symmetry_3d = np.array( [3, 4, 5, 0, 1, 2, 6, 7, 8, 9, 13, 14, 15, 10, 11, 12]) dim_cpn_to_gt = np.array( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) self.data_2d_gt = {} self.data_2d_cpn = {} self.data_3d = {} self.indices = [] if mode == "train": data_3d = train_set_3d data_2d_gt = train_set_2d_gt else: data_3d = test_set_3d data_2d_gt = test_set_2d_gt # cut videos into short clips of fixed length self.logger.info("Loading sequence...") for idx, k in enumerate(sorted(data_3d)): if k[0] == 11 and k[2].split(".")[0] == "Directions": # one video is missing # drop all four videos instead of only one camera's view self.data_3d[k] = None continue assert k in data_2d_gt, k assert data_3d[k].shape[0] == data_2d_gt[k].shape[0] cam_name = k[2].split(".")[1] cam_id = cameras.cam_name_to_id[cam_name] d2_cpn = data_2d_cpn["S{}".format( k[0])][k[2].split(".")[0]][cam_id - 1][:data_3d[k].shape[0], dim_cpn_to_gt] d2_cpn = d2_cpn.reshape([d2_cpn.shape[0], self.n_joints * 2]) self.data_2d_cpn[k] = d2_cpn d2_gt = data_2d_gt[k][:, dim_2d] d2_gt = d2_gt.reshape([d2_gt.shape[0], self.n_joints, 2]) d2_gt = d2_gt.reshape([d2_gt.shape[0], self.n_joints * 2]) self.data_2d_gt[k] = d2_gt d3 = data_3d[k][:, dim_3d] d3 = d3.reshape([d3.shape[0], self.n_joints, 3]) # align root to origin d3 = d3 - d3[:, :1, :] d3 = d3.reshape([d3.shape[0], self.n_joints * 3]) # remove zero root joint d3 = d3[:, 3:] self.data_3d[k] = d3 N = data_3d[k].shape[0] n = 0 while n + self.n_frames <= N: self.indices.append((idx, ) + k + (n, self.n_frames)) n += self.window_slide self.n_data = len(self.indices) self.logger.info("{} data loaded for {} dataset".format( self.n_data, mode)) # computing statistics for data normalization if "stats" in config: assert mode == "test", mode stats_data = config["stats"] self.logger.info("Loading stats...") self.mean_2d, self.std_2d, self.mean_3d, self.std_3d = stats_data else: assert mode == "train", mode self.mean_2d = np.mean(np.vstack(self.data_2d_gt.values()), axis=0) # (2J,) self.std_2d = np.std(np.vstack(self.data_2d_gt.values()), axis=0) # (2J,) self.mean_3d = np.mean(np.vstack(self.data_3d.values()), axis=0) # (3J,) self.std_3d = np.std(np.vstack(self.data_3d.values()), axis=0) # (3J,) self.logger.info("mean 2d: {}".format(self.mean_2d)) self.logger.info("std 2d: {}".format(self.std_2d)) self.logger.info("mean 3d: {}".format(self.mean_3d)) self.logger.info("std 3d: {}".format(self.std_3d)) stats_data = self.mean_2d, self.std_2d, self.mean_3d, self.std_3d config["stats"] = stats_data self.logger.info("Saving stats...")
def main(_): done = [] enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 0} png_lib = [] with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) rows = 0 filename = "Realtimedata.xlsx" workbook = xlsxwriter.Workbook(filename) worksheet = workbook.add_worksheet() while True: key = cv2.waitKey(1) & 0xFF #logger.info("start reading data") # check for other file types list_of_files = glob.iglob("{0}/*".format( openpose_output_dir)) # You may use iglob in Python3 latest_file = "" try: latest_file = max(list_of_files, key=os.path.getctime) except ValueError: #empthy dir pass if not latest_file: continue try: _file = file_name = latest_file print(latest_file) if not os.path.isfile(_file): raise Exception("No file found!!, {0}".format(_file)) data = json.load(open(_file)) #take first person _data = data["people"][0]["pose_keypoints_2d"] xy = [] #ignore confidence score """for o in range(0,len(_data),3): xy.append(_data[o]) xy.append(_data[o+1])""" if len(_data) >= 53: #openpose incl. confidence score #ignore confidence score for o in range(0, len(_data), 3): xy.append(_data[o]) xy.append(_data[o + 1]) else: #tf-pose-estimation xy = _data frame_indx = re.findall("(\d+)", file_name) frame = int(frame_indx[0]) joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] # mapping all body parts or 3d pose offline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][ 13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 _max = 0 _min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > _max: _max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < _min: _min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) for val in min_vex: # f.write(str(val) + ' ' + str(p_vex[i]) + ''); # gait_list1.append({'IX': "%i" % val[0], # 'IY': "%i" % val[1], # 'Ix': "%i" % p_vex[i][0], # 'Iy': "%i" % p_vex[i][1], # 'Iz': "%i" % p_vex[i][2], # }) gait_list1.append(val[0]) gait_list1.append(val[1]) gait_list1.append(p_vex[i][0]) gait_list1.append(p_vex[i][1]) gait_list1.append(p_vex[i][2]) points.append( " %f %f %f %d %d %d 0\n" % (p_vex[i][0], p_vex[i][1], p_vex[i][2], 0, 255, 0)) x.append(p_vex[i][0]) y.append(p_vex[i][1]) z.append(p_vex[i][2]) i = i + 1 # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) logger.debug(np.min(poses3d)) if np.min(poses3d) < -1000 and frame != 0: poses3d = before_pose p3d = poses3d '''gait_list1 = [] #enter file path below with open('key_joint_info.csv', 'w', newline='') as myfile: gait_list2.append(gait_list1) data1 = pd.DataFrame(gait_list2) wr = csv.writer(myfile, dialect = 'key_joint_info.csv' ) wr.writerow(p3d) wb.save(key_joint_info.csv)''' viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") col = 0 for i in p3d[0]: worksheet.write(rows, col, i) col += 1 #.append(i) rows += 1 before_pose = poses3d pngName = '{}_keypoints.png'.format(str(frame)) plt.savefig(pngName) #plt.show() img = cv2.imread(pngName, 0) rect_cpy = img.copy() cv2.imshow('3d-pose-realtime', rect_cpy) done.append(file_name) if key == ord('q'): break except Exception as e: print(e) sess.close()
def test(): SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] actions = data_utils.define_actions(FLAGS.action) rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections """ train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils_org.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils_org.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils_org.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) """ # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions, augmented3d, train_set_3d_for_noisy, data_mean_3d_test, data_std_3d_test = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14, FLAGS.augment_all, FLAGS.augment_rot, FLAGS.augment_flip, FLAGS.augment_trans, FLAGS.add_noise, FLAGS.add_kinematics) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d, train_set_2d_for_noisy, data_mean_2d_test, data_std_2d_test = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams, augmented3d) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) with tf.Session(config=tf.ConfigProto(device_count=device_count, gpu_options=gpu_options)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = FLAGS.batch_size model = create_model(sess, actions, batch_size) print("Model loaded") if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d_test, data_std_3d_test, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d_test, data_std_2d_test, dim_to_use_2d, dim_to_ignore_2d, 0, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: n_joints = 17 if not (FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d_test, data_std_3d_test, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d_test, data_std_2d_test, dim_to_use_2d, dim_to_ignore_2d, 0, encoder_inputs, decoder_outputs, FLAGS.epochs) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % (1000 * step_time, loss, total_err)) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format( i + 1, joint_err[i])) print("=============================")
def video(): """Get samples from a model and visualize them""" actions_all = data_utils.define_actions("All") # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions_all, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) train_set_3d = data_utils.remove_first_frame(train_set_3d) test_set_3d = data_utils.remove_first_frame(test_set_3d) train_root_positions = data_utils.remove_first_frame(train_root_positions) test_root_positions = data_utils.remove_first_frame(test_root_positions) print("Finished Read 3D Data") train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions_all, FLAGS.data_dir) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.transform_to_2d_biframe_prediction( train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d) print("Finished Read 2D Data") print(test_set_2d) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = FLAGS.batch_size #Intial code is 64*2 model = predict_3dpose_biframe.create_model(sess, actions_all, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d # if subj != 11: # continue # #if fname != 'Discussion 1.55011271.h5-sh': if (fname, subj) not in [("Greeting 1.60457274.h5-sh", 9), ("Photo.58860488.h5-sh", 9), ("Directions 1.54138969.h5-sh", 9), ("Purchases 1.55011271.h5-sh", 9), ("Greeting.54138969.h5-sh", 11), ("Discussion 1.55011271.h5-sh", 11), ("Eating 1.55011271.h5-sh", 11), ("Purchases 1.55011271.h5-sh", 11)]: continue print("Subject: {}, action: {}, fname: {}".format(subj, b, fname)) enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape print("Model Input has size : ", enc_in.shape) # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // batch_size) all_poses_3d = [] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 anything = np.zeros((enc_in[bidx].shape[0], 48)) _, _, poses3d = model.step(sess, enc_in[bidx], anything, dp, isTraining=False) # denormalize enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) all_poses_3d.append(poses3d) # Put all the poses together enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 cname = fname.split( '.' )[1] #camera_mapping[fname.split('.')[0][-1]] # <-- camera name "55011271" scams = {(subj, c + 1): rcams[(subj, c + 1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [ scams[(subj, c + 1)][-1] for c in range(N_CAMERAS) ].index(cname) # index of camera used the_cam = scams[(subj, scam_idx + 1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame( data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape( (-1, N_JOINTS_H36M * 3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:, :3], (1, N_JOINTS_H36M)) # Apply inverse rotation and translation poses3d = cam2world_centered(poses3d) # Grab a random batch to visualize enc_in, poses3d = map(np.vstack, [enc_in, poses3d]) #1080p = 1,920 x 1,080 fig = plt.figure(figsize=(7, 7)) gs1 = gridspec.GridSpec(1, 1) plt.axis('on') # dir_2d_poses = FLAGS.data_dir + 'S' + str(subj) + '/VideoBiframe/' + fname + '/2Destimate/' # if not os.path.isdir(dir_2d_poses): # os.makedirs(dir_2d_poses) dir_3d_estimates = FLAGS.data_dir + 'S' + str( subj) + '/VideoBiframe/' + fname + '/3Destimate/' if not os.path.isdir(dir_3d_estimates): os.makedirs(dir_3d_estimates)
def main(_): # 出力用日付 now_str = "{0:%Y%m%d_%H%M%S}".format(datetime.datetime.now()) logger.debug("FLAGS.person_idx={0}".format(FLAGS.person_idx)) # ディレクトリ構成が変わったので、JSON出力と同階層に出力(2/9) if FLAGS.output is None: subdir = openpose_output_dir else: subdir = FLAGS.output os.makedirs(subdir, exist_ok=True) frame3d_dir = "{0}/frame3d".format(subdir) if os.path.exists(frame3d_dir): # 既にディレクトリがある場合、一旦削除 shutil.rmtree(frame3d_dir) os.makedirs(frame3d_dir) #関節位置情報ファイル posf = open(subdir + '/pos.txt', 'w') #正規化済みOpenpose位置情報ファイル smoothedf = open(subdir + '/smoothed.txt', 'w') #開始フレームインデックスファイル start_frame_f = open(subdir + '/start_frame.txt', 'w') idx = FLAGS.person_idx - 1 start_frame_index, smoothed = openpose_utils.read_openpose_json( "{0}/json".format(openpose_output_dir), idx, FLAGS.verbose == 3) # 開始フレームインデックスを保存 start_frame_f.write(str(start_frame_index)) start_frame_f.close() logger.info("reading and smoothing done. start feeding 3d-pose-baseline") logger.debug(smoothed) plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) pngName = subdir + '/smooth_plot.png' smooth_curves_plot.savefig(pngName) enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) # before_pose = None device_count = {"GPU": 1} png_lib = [] with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) # 入力画像のスケール調整のため、NeckからHipまでの距離を測定 length_neck2hip_mean = get_length_neck2hip_mean(smoothed) # 2D、3D結果の保存用リスト poses3d_list = [] poses2d_list = [] # 2dと3dのスケール比率計算のためのリスト length_2d_list = [] length_3d_list = [] for n, (frame, xy) in enumerate(smoothed.items()): if frame % 200 == 0: logger.info("calc idx {0}, frame {1}".format(idx, frame)) #if frame % 300 == 0: # print(frame) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] smoothedf.write(' '.join(map(str, _data))) smoothedf.write("\n") # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Thorax # 3dPoseBaselineのThoraxの位置は、OpenPoseのNeckの位置より少し上のため調整する enc_in[0][13 * 2 + j] = 1.1 * enc_in[0][13 * 2 + j] - 0.1 * enc_in[0][0 * 2 + j] # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][13 * 2 + j]) / 2 # Spine enc_in[0][12 * 2 + j] = (enc_in[0][0 * 2 + j] + enc_in[0][13 * 2 + j]) / 2 # set spine # spine_x = enc_in[0][24] # spine_y = enc_in[0][25] # logger.debug("enc_in - 1") # logger.debug(enc_in) poses2d = enc_in # 入力データの拡大 # neckからHipまでが110ピクセル程度になるように入力を拡大する # (教師データとスケールが大きく異なると精度が落ちるため) input_scaling_factor = 110 / length_neck2hip_mean enc_in = enc_in * input_scaling_factor enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 poses3d_list.append(poses3d[0]) poses2d_list.append(poses2d[0]) length_2d_list.append(sum_length_xy(poses2d[0], 2)) length_3d_list.append(sum_length_xy(poses3d[0], 3)) # OpenPose出力の(x, y)とBaseline出力のzから、3次元の位置を計算する # OpenPose出力値とBaseline出力値のスケール比率 # 骨格の長さの合計の比較することで、比率を推定 # 前後の91フレームで移動平均をとることで、結果を安定化する move_ave_length_2d = calc_move_average(length_2d_list, 91) move_ave_length_3d = calc_move_average(length_3d_list, 91) move_ave_length_2d[move_ave_length_2d == 0] = 1 # error防止 xy_scale = move_ave_length_3d / move_ave_length_2d # 以下の4つは仮の値で計算。多少違っていても、精度に影響はないと思う center_2d_x, center_2d_y = camera_center( openpose_output_dir) #動画の中心座標(動画の解像度の半分) logger.info("center_2d_x {0}".format(center_2d_x)) z_distance = 4000 # カメラから体までの距離(mm) 遠近の影響計算で使用 camera_incline = 0 # カメラの水平方向に対する下への傾き(度) teacher_camera_incline = 13 # 教師データ(Human3.6M)のカメラの傾き(下向きに平均13度) for frame, (poses3d, poses2d) in enumerate(zip(poses3d_list, poses2d_list)): # 誤差を減らすため、OpenPose出力の(x, y)と3dPoseBaseline出力のzから、3次元の位置を計算する poses3d_op_xy = np.zeros(96) for i in [0, 1, 2, 3, 6, 7, 8, 13, 15, 17, 18, 19, 25, 26, 27]: # Hipとの差分 dy = poses3d[i * 3 + 1] - poses3d[0 * 3 + 1] dz = poses3d[i * 3 + 2] - poses3d[0 * 3 + 2] # 教師データのカメラ傾きを補正 dz = dz - dy * math.tan( math.radians(teacher_camera_incline - camera_incline)) # 遠近によるx,yの拡大率 z_ratio = (z_distance + dz) / z_distance # x, yはOpenposeの値から計算 poses3d_op_xy[i * 3] = (poses2d[i * 2] - center_2d_x) * xy_scale[frame] * z_ratio poses3d_op_xy[i * 3 + 1] = (poses2d[i * 2 + 1] - center_2d_y) * xy_scale[frame] * z_ratio # zはBaselineの値から計算 poses3d_op_xy[i * 3 + 2] = dz # 12(Spine)、14(Neck/Nose)、15(Head)はOpenPoseの出力にないため、baseline(poses3d)から計算する for i in [12, 14, 15]: # 13(Thorax)は認識されることが多いため基準とする # 差分 dx = poses3d[i * 3] - poses3d[13 * 3] dy = poses3d[i * 3 + 1] - poses3d[13 * 3 + 1] dz = poses3d[i * 3 + 2] - poses3d[13 * 3 + 2] # 教師データのカメラ傾きを補正 dz = dz - dy * math.tan( math.radians(teacher_camera_incline - camera_incline)) # 13(Thorax)からの差分でx, y ,zを求める poses3d_op_xy[i * 3] = poses3d_op_xy[13 * 3] + dx poses3d_op_xy[i * 3 + 1] = poses3d_op_xy[13 * 3 + 1] + dy poses3d_op_xy[i * 3 + 2] = poses3d_op_xy[13 * 3 + 2] + dz # MMD上で少し顎を引くための処理 poses3d_op_xy[15 * 3] += 0.5 * (poses3d_op_xy[14 * 3] - poses3d_op_xy[13 * 3]) poses3d_op_xy[15 * 3 + 1] += 0.5 * (poses3d_op_xy[14 * 3 + 1] - poses3d_op_xy[13 * 3 + 1]) poses3d_op_xy[15 * 3 + 2] += 0.5 * (poses3d_op_xy[14 * 3 + 2] - poses3d_op_xy[13 * 3 + 2]) poses3d_list[frame] = poses3d_op_xy logger.info("calc ground y") # 最も高さが低い足の部位のYを取得(この座標系ではY値が大きい方が低い) foot_joint_no = [1, 2, 3, 6, 7, 8] max_pos = [] for frame, poses3d in enumerate(poses3d_list): max_pos.append(np.max([poses3d[i * 3 + 1] for i in foot_joint_no])) # 地面についている部位の位置(通常は足首)をY軸の0になるように移動する for frame, poses3d in enumerate(poses3d_list): # 120フレーム分の位置を取得 max_pos_slice = max_pos[int(np.max([0, frame - 60])):frame + 60] # 半分以上のフレームでは着地していると仮定し、メディアンを着地時の足の位置とする ankle_pos = np.median(max_pos_slice) poses3d_ground = np.zeros(96) for i in range(len(data_utils.H36M_NAMES)): poses3d_ground[i * 3] = poses3d[i * 3] poses3d_ground[i * 3 + 1] = poses3d[i * 3 + 1] - ankle_pos poses3d_ground[i * 3 + 2] = poses3d[i * 3 + 2] poses3d_list[frame] = poses3d_ground for frame, (poses3d, poses2d) in enumerate(zip(poses3d_list, poses2d_list)): if frame % 200 == 0: logger.info("output frame {}".format(frame)) # max = 0 # min = 10000 # logger.debug("enc_in - 2") # logger.debug(enc_in) for j in range(32): tmp = poses3d[j * 3 + 2] poses3d[j * 3 + 2] = -poses3d[j * 3 + 1] poses3d[j * 3 + 1] = tmp # if poses3d[i][j * 3 + 2] > max: # max = poses3d[i][j * 3 + 2] # if poses3d[i][j * 3 + 2] < min: # min = poses3d[i][j * 3 + 2] # for i in range(poses3d.shape[0]): # for j in range(32): # poses3d[i][j * 3 + 2] = max - poses3d[i][j * 3 + 2] + min # poses3d[i][j * 3] += (spine_x - 630) # poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, 280) # logger.debug(np.min(poses3d)) # if np.min(poses3d) < -1000 and before_pose is not None: # poses3d = before_pose p3d = poses3d # logger.debug("poses3d") # logger.debug(poses3d) if frame == 0: first_xyz = [0, 0, 0] first_xyz[0], first_xyz[1], first_xyz[2] = p3d[0], p3d[1], p3d[ 2] if level[FLAGS.verbose] <= logging.INFO: viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71", add_labels=True, root_xyz=first_xyz) # 各フレームの単一視点からのはINFO時のみ pngName = frame3d_dir + '/tmp_{0:012d}.png'.format(frame) plt.savefig(pngName) png_lib.append(imageio.imread(pngName)) # before_pose = poses3d # 各フレームの角度別出力はデバッグ時のみ if level[FLAGS.verbose] == logging.DEBUG: for azim in [0, 45, 90, 135, 180, 225, 270, 315, 360]: ax2 = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax2.view_init(18, azim) viz.show3Dpose(p3d, ax2, lcolor="#FF0000", rcolor="#0000FF", add_labels=True, root_xyz=first_xyz) pngName2 = frame3d_dir + '/tmp_{0:012d}_{1:03d}.png'.format( frame, azim) plt.savefig(pngName2) #関節位置情報の出力 write_pos_data(poses3d, ax, posf) posf.close() smoothedf.close() # INFO時は、アニメーションGIF生成 if level[FLAGS.verbose] <= logging.INFO: logger.info( "creating Gif {0}/movie_smoothing.gif, please Wait!".format( subdir)) imageio.mimsave('{0}/movie_smoothing.gif'.format(subdir), png_lib, fps=FLAGS.gif_fps) logger.info("Done!".format(pngName))
def train(): """Train a linear model for 3d pose estimation""" actions = data_utils.define_actions( FLAGS.action ) number_of_actions = len( actions ) # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True )) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model( sess, actions, FLAGS.batch_size ) model.train_writer.add_graph( sess.graph ) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 for _ in xrange( FLAGS.epochs ): current_epoch = current_epoch + 1 # === Load training batches for one epoch === encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True ) nbatches = len( encoder_inputs ) print("There are {0} train batches".format( nbatches )) start_time, loss = time.time(), 0. # === Loop through all the training batches === for i in range( nbatches ): if (i+1) % log_every_n_batches == 0: # Print progress every log_every_n_batches batches print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i+1, nbatches), end="" ) enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] step_loss, loss_summary, lr_summary, _ = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining=True ) if (i+1) % log_every_n_batches == 0: # Log and print progress every log_every_n_batches batches model.train_writer.add_summary( loss_summary, current_step ) model.train_writer.add_summary( lr_summary, current_step ) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format( 1000*step_time / log_every_n_batches ) ) loss += step_loss current_step += 1 # === end looping through training batches === loss = loss / nbatches print("=============================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "=============================" % (model.global_step.eval(), model.learning_rate.eval(), loss) ) # === End training for an epoch === # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset( test_set_2d, action ) action_test_set_3d = get_action_subset( test_set_3d, action ) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs ) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err/float(len(actions)))} ) model.test_writer.add_summary( summaries, current_step ) print("{0:<12} {1:>6.2f}".format("Average", cum_err/float(len(actions) ))) print("{0:=^19}".format('')) else: n_joints = 17 if not(FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, current_epoch ) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % ( 1000*step_time, loss, total_err )) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format(i+1, joint_err[i])) print("=============================") # Log the error to tensorboard summaries = sess.run( model.err_mm_summary, {model.err_mm: total_err} ) model.test_writer.add_summary( summaries, current_step ) # Save the model print( "Saving the model... ", end="" ) start_time = time.time() model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step ) print( "done in {0:.2f} ms".format(1000*(time.time() - start_time)) ) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def test(): actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model(sess, actions, FLAGS.batch_size) model.train_writer.add_graph(sess.graph) current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 if FLAGS.evaluateActionWise: logger.info("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs cum_err = 0 # select the mixture model which has mininum error for action in actions: # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs, repro_info = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err logger.info('{0:<12} {1:>6.2f}'.format(action, act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) logger.info('{0:<12} {1:>6.2f}'.format( "Average", cum_err / float(len(actions)))) logger.info('{0:=^19}'.format(''))
def main(_): # 出力用日付 now_str = "{0:%Y%m%d_%H%M%S}".format(datetime.datetime.now()) logger.debug("FLAGS.person_idx={0}".format(FLAGS.person_idx)) # 日付+indexディレクトリ作成 subdir = '{0}/{1}_3d_{2}_idx{3:02d}'.format( os.path.dirname(openpose_output_dir), os.path.basename(openpose_output_dir), now_str, FLAGS.person_idx) os.makedirs(subdir) frame3d_dir = "{0}/frame3d".format(subdir) os.makedirs(frame3d_dir) #関節位置情報ファイル posf = open(subdir + '/pos.txt', 'w') #正規化済みOpenpose位置情報ファイル smoothedf = open(subdir + '/smoothed.txt', 'w') idx = FLAGS.person_idx - 1 smoothed = openpose_utils.read_openpose_json(openpose_output_dir, idx, level[FLAGS.verbose] == 3) logger.info("reading and smoothing done. start feeding 3d-pose-baseline") logger.debug(smoothed) plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) pngName = subdir + '/smooth_plot.png' smooth_curves_plot.savefig(pngName) enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) before_pose = None device_count = {"GPU": 1} png_lib = [] with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) for n, (frame, xy) in enumerate(smoothed.items()): logger.info("calc idx {0}, frame {1}".format(idx, frame)) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] smoothedf.write(' '.join(map(str, _data))) smoothedf.write("\n") # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] # logger.debug("enc_in - 1") # logger.debug(enc_in) enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 max = 0 min = 10000 # logger.debug("enc_in - 2") # logger.debug(enc_in) for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > max: max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < min: min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = max - poses3d[i][j * 3 + 2] + min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, 280) logger.debug(np.min(poses3d)) if np.min(poses3d) < -1000 and before_pose is not None: poses3d = before_pose p3d = poses3d # logger.debug("poses3d") # logger.debug(poses3d) if level[FLAGS.verbose] == logging.INFO: viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71", add_labels=True) # 各フレームの単一視点からのはINFO時のみ pngName = frame3d_dir + '/tmp_{0:012d}.png'.format(frame) plt.savefig(pngName) png_lib.append(imageio.imread(pngName)) before_pose = poses3d # 各フレームの角度別出力はデバッグ時のみ if level[FLAGS.verbose] == logging.DEBUG: for azim in [0, 45, 90, 135, 180, 225, 270, 315, 360]: ax2 = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax2.view_init(18, azim) viz.show3Dpose(p3d, ax2, lcolor="#FF0000", rcolor="#0000FF", add_labels=True) pngName2 = frame3d_dir + '/tmp_{0:012d}_{1:03d}.png'.format( frame, azim) plt.savefig(pngName2) #関節位置情報の出力 write_pos_data(poses3d, ax, posf) posf.close() # INFO時は、アニメーションGIF生成 if level[FLAGS.verbose] == logging.INFO: logger.info( "creating Gif {0}/movie_smoothing.gif, please Wait!".format( subdir)) imageio.mimsave('{0}/movie_smoothing.gif'.format(subdir), png_lib, fps=FLAGS.gif_fps) logger.info("Done!".format(pngName))
def sample(): """Get samples from a model and visualize them""" path = '{}/samples_sh'.format(FLAGS.train_dir) if not os.path.exists(path): os.makedirs(path) actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) n_joints = 17 if not (FLAGS.predict_14) else 14 # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d, _ = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d # choose SittingDown action to visualize if b == 'SittingDown': print("Subject: {}, action: {}, fname: {}".format( subj, b, fname)) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else ( subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if ( fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape dec_out = test_set_3d[key3d] n3d, _ = dec_out.shape assert n2d == n3d # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // batch_size) dec_out = np.array_split(dec_out, n3d // batch_size) # store all pose hypotheses in a list pose_3d_mdm = [[], [], [], [], []] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 loss, _, out_all_components = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # denormalize the input 2d pose, ground truth 3d pose as well as 3d pose hypotheses from mdm out_all_components = np.reshape( out_all_components, [-1, model.HUMAN_3D_SIZE + 2, model.num_models]) out_mean = out_all_components[:, :model.HUMAN_3D_SIZE, :] enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d) poses3d = np.zeros( (out_mean.shape[0], 96, out_mean.shape[-1])) for j in range(out_mean.shape[-1]): poses3d[:, :, j] = data_utils.unNormalizeData( out_mean[:, :, j], data_mean_3d, data_std_3d, dim_to_ignore_3d) # extract the 17 joints dtu3d = np.hstack( (np.arange(3), dim_to_use_3d )) if not (FLAGS.predict_14) else dim_to_use_3d dec_out_17 = dec_out[bidx][:, dtu3d] pose_3d_17 = poses3d[:, dtu3d, :] sqerr = (pose_3d_17 - np.expand_dims(dec_out_17, axis=2))**2 dists = np.zeros( (sqerr.shape[0], n_joints, sqerr.shape[2])) for m in range(dists.shape[-1]): dist_idx = 0 for k in np.arange(0, n_joints * 3, 3): dists[:, dist_idx, m] = np.sqrt( np.sum(sqerr[:, k:k + 3, m], axis=1)) dist_idx = dist_idx + 1 [ pose_3d_mdm[i].append(poses3d[:, :, i]) for i in range(poses3d.shape[-1]) ] # Put all the poses together enc_in, dec_out = map(np.vstack, [enc_in, dec_out]) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = np.vstack(pose_3d_mdm[i]) # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile(test_root_positions[key3d], [1, N_JOINTS_H36M]) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = pose_3d_mdm[i] + np.tile( test_root_positions[key3d], [1, N_JOINTS_H36M]) # Load the appropriate camera subj, action, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj, c + 1): rcams[(subj, c + 1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [ scams[(subj, c + 1)][-1] for c in range(N_CAMERAS) ].index(cname) # index of camera used the_cam = scams[(subj, scam_idx + 1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame( data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape( (-1, N_JOINTS_H36M * 3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:, :3], (1, N_JOINTS_H36M)) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = cam2world_centered(pose_3d_mdm[i]) # sample some results to visualize np.random.seed(42) idx = np.random.permutation(enc_in.shape[0]) enc_in, dec_out = enc_in[idx, :], dec_out[idx, :] for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = pose_3d_mdm[i][idx, :] exidx = 1 nsamples = 20 for i in np.arange(nsamples): fig = plt.figure(figsize=(20, 5)) subplot_idx = 1 gs1 = gridspec.GridSpec(1, 7) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') # Plot 2d pose ax1 = plt.subplot(gs1[subplot_idx - 1]) p2d = enc_in[exidx, :] viz.show2Dpose(p2d, ax1) ax1.invert_yaxis() # Plot 3d gt ax2 = plt.subplot(gs1[subplot_idx], projection='3d') p3d = dec_out[exidx, :] viz.show3Dpose(p3d, ax2) # Plot 3d pose hypotheses for i in range(poses3d.shape[-1]): ax3 = plt.subplot(gs1[subplot_idx + i + 1], projection='3d') p3d = pose_3d_mdm[i][exidx] viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") # plt.show() plt.savefig('{}/sample_{}_{}_{}_{}.png'.format( path, subj, action, scam_idx, exidx)) plt.close(fig) exidx = exidx + 1
def train(self, config): # Define some parameters batch_idxs = 500 decay_rate = 0.5 # empirical epochs_to_decay = 50 boundaries = [] lr_values = [config.learning_rate] for exp in range(1, 6): lr_values.append(config.learning_rate * (decay_rate)**exp) boundaries.append(batch_idxs * exp * epochs_to_decay) print(boundaries, lr_values) self.learning_rate = tf.train.piecewise_constant(self.global_step, boundaries, lr_values) self.lr_sum = tf.summary.scalar("lr", self.learning_rate) # TODO: check gradient magnitude here update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='generator/first_frame_generator') with tf.control_dependencies(update_ops): opt = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5, beta2=0.9) g_grad = opt.compute_gradients(self.g_loss, var_list=self.g_vars) g_optim = opt.minimize(self.g_loss, var_list=self.g_vars) grad_sum = [] if config.check_grad: layer_to_check = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=config.check_grad) check_list = [] for pair in g_grad: if pair[1] in layer_to_check: check_list.append(pair[0]) grad_sum.append(tf.summary.histogram(pair[0].name, pair[0])) p_optim = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5, beta2=0.9) \ .minimize(self.p_loss, var_list=self.p_vars) self.sess.run( tf.global_variables_initializer() ) self.g_sum = tf.summary.merge( [self.z_sum, self.g_loss_sum] + grad_sum) self.p_sum = tf.summary.merge( [self.z_sum, self.p_loss_sum, self.p_grad_norm, self.p_gp_loss_sum] + self.p_vars_sum) counter = 1 start_time = time.time() if config.load: self.load(self.checkpoint_dir) print(""" ====== An existing model was found in the checkpoint directory. If you want to train a new model from scratch, delete the checkpoint directory or specify a different --checkpoint_dir argument. ====== """) else: print(""" ====== An existing model was not found in the checkpoint directory. Initializing a new one. ====== """) # ======load data================== actions = data_utils.define_actions(self.actions) number_of_actions = len( actions ) print('actions',actions) # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(config.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data_corrected( actions, config.dataset, rcams ) print( "done reading and normalizing data." ) n = 0 for key2d in train_set_2d.keys(): n2d, _ = train_set_2d[ key2d ].shape n = n + n2d//self.image_shape[0] nbatches = n // config.batch_size tr_loader = self.h36m_data_loader(config.batch_size, train_set_2d, config.seq_length, actions) te_loader = self.h36m_data_loader(config.sample_size, test_set_2d, config.seq_length, actions) for epoch in xrange(config.epoch): for idx in xrange(0, batch_idxs): batch_pose, batch_class = tr_loader.__next__() batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \ .astype(np.float32) if config.check_input: if epoch == 0 and idx == 0: for i in range(batch_pose.shape[0]): draw_pose(batch_pose[i], os.path.join(config.pose_dir, 'input', 'class_%02d' %int(np.where(batch_class[i])[0])), '%d.jpg' %(i), data_mean_2d, data_std_2d, dim_to_ignore_2d) if epoch < 25: n_iter = 25 else: n_iter = 5 for _ in range(5): _, p_loss = self.sess.run([p_optim, self.p_loss], feed_dict={ self.z: batch_z, self.kp: 0.5, self.p_kp: 0.5, self.pose_vec: batch_pose, self.class_vec: batch_class, self.global_step: counter, self.g_is_training: True}) for _ in range(1): # Update G network _, g_loss = self.sess.run([g_optim, self.g_loss], feed_dict={ self.z: batch_z, self.kp: 0.5 , self.p_kp: 0.5, self.class_vec: batch_class, self.global_step: counter, self.g_is_training: True}) errG, errP, g_sum_str, p_sum_str, lr_sum = \ self.sess.run([self.g_loss, self.p_loss, self.g_sum, self.p_sum, self.lr_sum], {self.z: batch_z, self.pose_vec: batch_pose, self.kp: 1.0, self.p_kp: 1.0, self.class_vec: batch_class, self.global_step: counter, self.g_is_training: False}) counter += 1 print("\rEpoch: [%2d] [%4d/%4d] time: %4.4f, p_loss: %.8f, g_loss: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, errP, errG), end='') if np.mod(counter, batch_idxs) == 1: g_loss_s = 0.0 p_loss_s, p_real_s, p_fake_s = 0.0, 0.0, 0.0 for b in range(self.val_nbatch): sample_z = np.random.uniform(-1, 1, size=(config.sample_size , self.z_dim)) sample_pose, sample_class = te_loader.__next__() samples, g_loss, p_loss = self.sess.run( [self.G_first, self.g_loss, self.p_loss], feed_dict={self.z: sample_z, self.kp: 1.0, self.p_kp: 1.0, self.pose_vec: sample_pose, self.class_vec: sample_class, self.g_is_training: False} ) g_loss_s += g_loss p_loss_s += p_loss if config.val_save: if b >= 1: continue for i in range(samples.shape[0]): draw_pose(samples[i], os.path.join(config.pose_dir, 'train_%02d_%04d' %(epoch, idx), 'class_%02d' %int(np.where(sample_class[i])[0])), 'recons_%d.jpg' %(i), data_mean_2d, data_std_2d, dim_to_ignore_2d) g_loss_s /= self.val_nbatch p_loss_s /= self.val_nbatch print("\n[Sample] g_loss: %.8f, p_loss: %.8f" % (g_loss_s, p_loss)) if np.mod(counter, 4000) == 1: self.save(config.checkpoint_dir, counter + config.global_counter)
def main(_): smoothed = read_openpose_json() logger.info("reading and smoothing done. start feeding 3d-pose-baseline") plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) pngName = 'png/smooth_plot.png' smooth_curves_plot.savefig(pngName) enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 1} png_lib = [] with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) for n, (frame, xy) in enumerate(smoothed.items()): logger.info("calc frame {0}".format(frame)) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 max = 0 min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > max: max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < min: min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = max - poses3d[i][j * 3 + 2] + min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) logger.debug(np.min(poses3d)) if np.min(poses3d) < -1000: poses3d = before_pose p3d = poses3d logger.debug(poses3d) viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") pngName = 'png/test_{0}.png'.format(str(frame)) plt.savefig(pngName) png_lib.append(imageio.imread(pngName)) before_pose = poses3d logger.info("creating Gif png/movie_smoothing.gif, please Wait!") imageio.mimsave('png/movie_smoothing.gif', png_lib, fps=FLAGS.gif_fps) logger.info("Done!".format(pngName))
def train(): """Train a linear model for 3d pose estimation""" actions = data_utils.define_actions( FLAGS.action) #returns a list of corresponding actions number_of_actions = len(actions) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] #1,5,6,7,8 for train, 9,11 for test rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) #得到了关于camera的参数 #print("相机的参数") # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) #print(data_mean_3d.shape[1],"平均值的大小") #assert 1==2,"debug结束" #train_set_3d是个dict,key为(S的编码,action,文件名)+pose(n,96)的大小 # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data", test_set_2d.shape) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) #打印出来的是Creating 2 bi-layers of 1024 units. model = create_model(sess, actions, FLAGS.batch_size) #FLAGS的batch_size是64 model.train_writer.add_graph(sess.graph) #将图添加到tensorboard中 print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 #之后需要载入checkpoint previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 #每100次打印一下 for _ in xrange(FLAGS.epochs): #与range类似但也有不同之处 current_epoch = current_epoch + 1 # === Load training batches for one epoch === 在这里输入和输出的还是32和48为的 encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True) #按照batch的大小对输入和输出进行切片,切成[array(n1,32/48),array(n2,32/48),...,...] nbatches = len(encoder_inputs) print("There are {0} train batches".format( nbatches)) #24371个branches start_time, loss = time.time(), 0. # === Loop through all the training batches === for i in range(nbatches): if (i + 1) % log_every_n_batches == 0: # Print progress every log_every_n_batches batches print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i + 1, nbatches), end="") enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] #print(enc_in.shape,dec_out.shape) (64,32)和(64,48) #将输入值输入到model模型中, step_loss, loss_summary, lr_summary, _ = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining=True) if (i + 1) % log_every_n_batches == 0: # Log and print progress every log_every_n_batches batches 每100次就将结果写到tensorboard中去 model.train_writer.add_summary(loss_summary, current_step) model.train_writer.add_summary(lr_summary, current_step) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format(1000 * step_time / log_every_n_batches)) loss += step_loss current_step += 1 # === end looping through training batches === loss = loss / nbatches print("=============================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "=============================" % (model.global_step.eval(), model.learning_rate.eval(), loss)) # === End training for an epoch === # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs 即为====Action====mm cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action 并将他们按照batch的大小切割好(不需要训练的话,就不需要随机排列了 action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) #evaluate_batches要好好看一下 act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: n_joints = 17 if not (FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, current_epoch) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % (1000 * step_time, loss, total_err)) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format( i + 1, joint_err[i])) print("=============================") # Log the error to tensorboard summaries = sess.run(model.err_mm_summary, {model.err_mm: total_err}) model.test_writer.add_summary(summaries, current_step) # Save the model print("Saving the model... ", end="") start_time = time.time() model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step) print("done in {0:.2f} ms".format(1000 * (time.time() - start_time))) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def hankgogo(gogodata, gogodatafake): """Get samples from a model and visualize them""" actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) #if FLAGS.use_sh: # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) #else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 1 model = create_model_my(sess, actions, batch_size) print("Model loaded") # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 poses3d = model.step(sess, gogodata, isTraining=False) tesmp = poses3d poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) model.saver.save(sess, os.path.join(mysave_dir, "gogo")) # Grab a random batch to visualize # enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, poses3d] ) # idx = np.random.permutation( enc_in.shape[0] ) # enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :] # Visualize random samples import matplotlib.gridspec as gridspec # 1080p = 1,920 x 1,080 fig = plt.figure(figsize=(19.2, 10.8)) gs1 = gridspec.GridSpec(5, 9) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') subplot_idx, exidx = 1, 1 nsamples = 1 # Plot 2d pose #ax1 = plt.subplot(gs1[subplot_idx-1]) #p2d = enc_in[exidx,:] #viz.show2Dpose( p2d, ax1 ) #ax1.invert_yaxis() # Plot 3d gt #ax2 = plt.subplot(gs1[subplot_idx], projection='3d') #p3d = dec_out[exidx,:] #viz.show3Dpose( p3d, ax2 ) # Plot 3d predictions ax3 = plt.subplot(gs1[subplot_idx + 1], projection='3d') p3d = poses3d viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") exidx = exidx + 1 subplot_idx = subplot_idx + 3 plt.show()
def main(_): done = [] enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 0} png_lib = [] with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) while True: key = cv2.waitKey(1) & 0xFF #logger.info("start reading data") # check for other file types list_of_files = glob.iglob("{0}/*".format( openpose_output_dir)) # You may use iglob in Python3 latest_file = "" try: latest_file = max(list_of_files, key=os.path.getctime) except ValueError: #empthy dir pass if not latest_file: continue try: _file = file_name = latest_file print(latest_file) if not os.path.isfile(_file): raise Exception("No file found!!, {0}".format(_file)) data = json.load(open(_file)) #take first person _data = data["people"][0]["pose_keypoints"] xy = [] #ignore confidence score for o in range(0, len(_data), 3): xy.append(_data[o]) xy.append(_data[o + 1]) frame_indx = re.findall("(\d+)", file_name) frame = int(frame_indx[0]) joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] _data = joints_array[0] # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][ 13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 _max = 0 _min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > _max: _max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < _min: _min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) logger.debug(np.min(poses3d)) if np.min(poses3d) < -1000 and frame != 0: poses3d = before_pose p3d = poses3d viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") before_pose = poses3d pngName = 'png/test_{0}.png'.format(str(frame)) plt.savefig(pngName) #plt.show() img = cv2.imread(pngName, 0) rect_cpy = img.copy() cv2.imshow('3d-pose-baseline', rect_cpy) done.append(file_name) if key == ord('q'): break except Exception as e: print(e) sess.close()
def main(_): smoothed = read_openpose_json() plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) #return pngName = 'gif_output/smooth_plot.png' smooth_curves_plot.savefig(pngName) logger.info('writing gif_output/smooth_plot.png') if FLAGS.interpolation: logger.info("start interpolation") framerange = len( smoothed.keys() ) joint_rows = 36 array = np.concatenate(list(smoothed.values())) array_reshaped = np.reshape(array, (framerange, joint_rows) ) multiplier = FLAGS.multiplier multiplier_inv = 1/multiplier out_array = np.array([]) for row in range(joint_rows): x = [] for frame in range(framerange): x.append( array_reshaped[frame, row] ) frame = range( framerange ) frame_resampled = np.arange(0, framerange, multiplier) spl = UnivariateSpline(frame, x, k=3) #relative smooth factor based on jnt anim curve min_x, max_x = min(x), max(x) smooth_fac = max_x - min_x smooth_resamp = 125 smooth_fac = smooth_fac * smooth_resamp spl.set_smoothing_factor( float(smooth_fac) ) xnew = spl(frame_resampled) out_array = np.append(out_array, xnew) logger.info("done interpolating. reshaping {0} frames, please wait!!".format(framerange)) a = np.array([]) for frame in range( int( framerange * multiplier_inv ) ): jnt_array = [] for jnt in range(joint_rows): jnt_array.append( out_array[ jnt * int(framerange * multiplier_inv) + frame] ) a = np.append(a, jnt_array) a = np.reshape(a, (int(framerange * multiplier_inv), joint_rows)) out_array = a interpolate_smoothed = {} for frame in range( int(framerange * multiplier_inv) ): interpolate_smoothed[frame] = list( out_array[frame] ) plt.figure(3) smoothed = interpolate_smoothed interpolate_curves_plot = show_anim_curves(smoothed, plt) pngName = 'gif_output/interpolate_{0}.png'.format(smooth_resamp) interpolate_curves_plot.savefig(pngName) logger.info('writing gif_output/interpolate_plot.png') enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 1} png_lib = [] before_pose = None with tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True)) as sess: #plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) iter_range = len(smoothed.keys()) export_units = {} twod_export_units = {} for n, (frame, xy) in enumerate(smoothed.items()): logger.info("calc frame {0}/{1}".format(frame, iter_range)) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): #feed array with xy array joints_array[0][o] = xy[o] twod_export_units[frame]={} for abs_b, __n in enumerate(range(0, len(xy),2)): twod_export_units[frame][abs_b] = {"translate": [xy[__n],xy[__n+1]]} _data = joints_array[0] # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 dec_out = np.zeros((1, 48)) dec_out[0] = [0 for i in range(48)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append( poses3d ) enc_in, poses3d = map( np.vstack, [enc_in, all_poses_3d] ) subplot_idx, exidx = 1, 1 _max = 0 _min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > _max: _max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < _min: _min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) if FLAGS.cache_on_fail: if np.min(poses3d) < -1000: poses3d = before_pose p3d = poses3d to_export = poses3d.tolist()[0] x,y,z = [[] for _ in range(3)] for o in range(0, len(to_export), 3): x.append(to_export[o]) y.append(to_export[o+1]) z.append(to_export[o+2]) export_units[frame]={} for jnt_index, (_x, _y, _z) in enumerate(zip(x,y,z)): export_units[frame][jnt_index] = {"translate": [_x, _y, _z]} viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") pngName = 'png/pose_frame_{0}.png'.format(str(frame).zfill(12)) plt.savefig(pngName) if FLAGS.write_gif: png_lib.append(imageio.imread(pngName)) if FLAGS.cache_on_fail: before_pose = poses3d if FLAGS.write_gif: if FLAGS.interpolation: #take every frame on gif_fps * multiplier_inv png_lib = np.array([png_lib[png_image] for png_image in range(0,len(png_lib), int(multiplier_inv)) ]) logger.info("creating Gif gif_output/animation.gif, please Wait!") imageio.mimsave('gif_output/animation.gif', png_lib, fps=FLAGS.gif_fps) _out_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'maya/3d_data.json') twod_out_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'maya/2d_data.json') with open(_out_file, 'w') as outfile: logger.info("exported maya json to {0}".format(_out_file)) json.dump(export_units, outfile) with open(twod_out_file, 'w') as outfile: logger.info("exported maya json to {0}".format(twod_out_file)) json.dump(twod_export_units, outfile) logger.info("Done!".format(pngName))
def create_movie(): actions = define_actions("All") rcams, vcams = cameras.load_cameras('cameras.h5', [1, 5, 6, 7, 8, 9, 11]) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} os.system('mkdir -p ' + FLAGS.output_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions, offsets_train, offsets_test = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, vcams) if (FLAGS.use_sh): train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams, vcams) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, device_count=device_count)) as sess: # === Create the model === isTraining = False batch_size = 1 nsamples = batch_size isTraining = False model = create_model(sess, isTraining, dim_to_use_3d, 1, data_mean_3d, data_std_3d, dim_to_ignore_3d) print("Model created") with h5py.File(FLAGS.data_2d_path, 'r') as h5f: enc_in = h5f['enc_in'][:] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) n2d = enc_in.shape[0] n_extra = n2d % FLAGS.seqlen if n_extra > 0: enc_in = enc_in[:-n_extra, :] n2d = enc_in.shape[0] pose_2d_sliding = [] encoder_inputs = [] for i in range(n2d - FLAGS.seqlen + 1): pose_2d_sliding.append(enc_in[i:i + FLAGS.seqlen, :]) pose_2d_list = np.stack(pose_2d_sliding) encoder_inputs.append(pose_2d_list) encoder_inputs = np.vstack(encoder_inputs) n_splits = n2d - FLAGS.seqlen + 1 encoder_inputs = np.array_split(encoder_inputs, n_splits) all_poses_3d = [] enc_inputs = [] ### MAKE PREDICTIONS ###### for bidx in range(len(encoder_inputs)): # print("Working on batch {0} / {1}... ".format( bidx+1, len(enc_in)), end="" ) dp = 1.0 enc_in = encoder_inputs[bidx] dec_out = np.zeros(shape=(1, FLAGS.seqlen, 48)) enc_gt = 0 _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) enc_in = np.reshape(enc_in, [-1, 16 * 2]) poses3d = np.reshape(poses3d, [-1, 16 * 3]) if not (bidx == 0): enc_in = np.expand_dims(enc_in[FLAGS.seqlen - 1, :], axis=0) poses3d = np.expand_dims(poses3d[FLAGS.seqlen - 1, :], axis=0) inp = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) enc_inputs.append(inp) all_poses_3d.append(poses3d) enc_in = np.vstack(enc_inputs) poses3d = np.vstack(all_poses_3d) ## Choose camera_id for reconstruction into world coordinate ### NOTE: FOR ARBITRARY 2D detections selecting any camera of subject 9 and 11 works the_cam = rcams[( FLAGS.sub_id, FLAGS.cam_id)] #54138969# 55011271# 58860488 # 60457274 R, _, _, _, _, _, name = the_cam print(name) # # Apply inverse rotation and translation poses3d = np.reshape(poses3d, [-1, 3]) #### NOTE: ONLY the rotation param matters X_cam = R.T.dot(poses3d.T) poses3d = np.reshape(X_cam.T, [-1, 32 * 3]) poses3d = poses3d - np.tile(poses3d[:, :3], [1, 32]) # We should be all set now :) ##### GENERATE THE MOVIE fig = plt.figure(figsize=(12.8, 7.2)) ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 1 + 1, projection='3d') n2d = enc_in.shape[0] ob1 = viz.Ax2DPose(ax1) ob2 = viz.Ax3DPose(ax2, lcolor="#9b59b6", rcolor="#2ecc71") fnames = sorted(glob.glob(FLAGS.img_dir + "*.jpg")) #print(fnames[0],fnames[1]) for i in range(n2d): #t0 = time() print("Working on figure {0:04d} / {1:05d}... \n".format( i + 1, n2d), end='') p2d = enc_in[i, :] im = Image.open(fnames[i]) ob1.update(im, p2d) # Plot 3d gt p3d = poses3d[i, :] ob2.update(p3d) fig.canvas.draw() img_str = np.fromstring(fig.canvas.tostring_rgb(), np.uint8) ncols, nrows = fig.canvas.get_width_height() nparr = np.fromstring(img_str, dtype=np.uint8).reshape(nrows, ncols, 3) #img_np = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR) print(FLAGS.output_dir + '{0:05d}.jpg'.format(i + 1)) cv2.imwrite(FLAGS.output_dir + '{0:05d}.jpg'.format(i + 1), nparr[:, :, ::-1])
def main(_): actions_all = data_utils.define_actions("All") actions = data_utils.define_actions("Discussion") # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) train_set_3d = data_utils.remove_first_frame(train_set_3d) test_set_3d = data_utils.remove_first_frame(test_set_3d) train_root_positions = data_utils.remove_first_frame(train_root_positions) test_root_positions = data_utils.remove_first_frame(test_root_positions) print("Finished Read 3D Data") # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions_all, FLAGS.data_dir) # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.transform_to_2d_biframe_prediction(train_set_2d, # test_set_2d, # data_mean_2d, # data_std_2d, # dim_to_ignore_2d, # dim_to_use_2d) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions_all, FLAGS.data_dir, rcams) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.transform_to_2d_biframe_prediction( train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d) SH_TO_GT_PERM = np.array( [SH_NAMES.index(h) for h in H36M_NAMES if h != '' and h in SH_NAMES]) assert np.all(SH_TO_GT_PERM == np.array( [6, 2, 1, 0, 3, 4, 5, 7, 8, 9, 13, 14, 15, 12, 11, 10])) test_set = {} manipulation_dir = os.path.dirname(FLAGS.data_dir) manipulation_dir = os.path.dirname(manipulation_dir) manipulation_dir += '/manipulation_video/' manipulation_folders = glob.glob(manipulation_dir + '*') subj = 1 action = 'manipulation-video' for folder in manipulation_folders: seqname = os.path.basename(folder) with h5py.File(folder + '/' + seqname + '.h5', 'r') as h5f: poses = h5f['poses'][:] # Permute the loaded data to make it compatible with H36M poses = poses[:, SH_TO_GT_PERM, :] # Reshape into n x (32*2) matrix poses = np.reshape(poses, [poses.shape[0], -1]) poses_final = np.zeros([poses.shape[0], len(H36M_NAMES) * 2]) dim_to_use_x = np.where( np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] * 2 dim_to_use_y = dim_to_use_x + 1 dim_to_use = np.zeros(len(SH_NAMES) * 2, dtype=np.int32) dim_to_use[0::2] = dim_to_use_x dim_to_use[1::2] = dim_to_use_y poses_final[:, dim_to_use] = poses print(seqname, poses_final.shape) poses_final[poses_final == 0.] = 0.1 test_set[(subj, action, seqname)] = poses_final test_set = data_utils.uni_frame_to_bi_frame(test_set) test_set_2d = data_utils.normalize_data(test_set, data_mean_2d, data_std_2d, dim_to_use_2d) for key in test_set.keys(): test_set[key] = test_set[key][0::2, :] dim_to_use_12_manipulation_joints = np.array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 51, 52, 53, 54, 55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83 ]) print("Finished Normalize Manipualtion Videos") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = FLAGS.batch_size #Intial code is 64*2 model = predict_3dpose_biframe.create_model(sess, actions_all, batch_size) print("Model loaded") j = 0 for key2d in test_set_2d.keys(): (subj, b, fname) = key2d # if fname != specific_seqname + '.h5': # continue print("Subject: {}, action: {}, fname: {}".format(subj, b, fname)) enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // 1) all_poses_3d = [] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 anything = np.zeros((enc_in[bidx].shape[0], 48)) _, _, poses3d = model.step(sess, enc_in[bidx], anything, dp, isTraining=False) # Denormalize enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) all_poses_3d.append(poses3d) # Put all the poses together enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) enc_in, poses3d = map(np.vstack, [enc_in, poses3d]) poses3d_12_manipulation = poses3d[:, dim_to_use_12_manipulation_joints] annotated_images = glob.glob(manipulation_dir + fname + '/info/*.xml') annotated_images = sorted(annotated_images) # 1080p = 1,920 x 1,080 fig = plt.figure(j, figsize=(10, 10)) gs1 = gridspec.GridSpec(3, 3) gs1.update(wspace=-0, hspace=0.1) # set the spacing between axes. plt.axis('off') subplot_idx = 1 nsamples = 3 for i in np.arange(nsamples): # Plot 2d Detection ax1 = plt.subplot(gs1[subplot_idx - 1]) img = mpimg.imread( manipulation_dir + fname + '/skeleton_cropped/' + os.path.basename(annotated_images[i]).split('_')[0] + '.jpg') ax1.imshow(img) # Plot 2d pose ax2 = plt.subplot(gs1[subplot_idx]) # p2d = enc_in[i,:] # viz.show2Dpose( p2d, ax2 ) # ax2.invert_yaxis() ax2.imshow(img) # Plot 3d predictions # Compute first the procrustion and print error gt = getJ3dPosFromXML(annotated_images[i]) A = poses3d_12_manipulation[i, :].reshape(gt.shape) _, Z, T, b, c = procrustes.compute_similarity_transform( gt, A, compute_optimal_scale=True) sqerr = np.sqrt(np.sum((gt - (b * A.dot(T)) - c)**2, axis=1)) print("{0} - {1} - Mean Error (mm) : {2}".format( fname, os.path.basename(annotated_images[i]), np.mean(sqerr))) ax3 = plt.subplot(gs1[subplot_idx + 1], projection='3d') temp = poses3d[i, :].reshape((32, 3)) temp = c + temp.dot(T) #Do not scale # p3d = temp.reshape((1, 96)) p3d = poses3d[i, :] viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") ax3.invert_zaxis() ax3.invert_yaxis() subplot_idx = subplot_idx + 3 plt.show() j += 1
def predict(convert_to_world): """ Run the model and predict pose data convert_to_world is a flag indicating whether to convert the data back to world coordinates from the camera frame. """ actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d print("Subject: {}, action: {}, fname: {}".format(subj, b, fname)) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format( fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if ( fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape dec_out = test_set_3d[key3d] n3d, _ = dec_out.shape assert n2d == n3d # Generate the loss pairs loss_pairs = None if model.num_loss_pairs: num_pts = int(model.HUMAN_3D_SIZE / 3) pairs = np.asarray([(i, j) for i in range(num_pts) for j in range(num_pts) if i < j]) pair_idxs = [ np.random.choice(len(pairs), model.num_loss_pairs, replace=False) for _ in range(n3d) ] loss_pairs = np.take(pairs, pair_idxs, axis=0) loss_pairs = np.array_split(loss_pairs, n2d // batch_size) # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // batch_size) dec_out = np.array_split(dec_out, n3d // batch_size) all_poses_3d = [] # enc_in_modified = [] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 if model.num_loss_pairs: _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, loss_pairs=loss_pairs[bidx], isTraining=False) else: _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # poses3dnew = [] # for e in enc_in[bidx]: # poses3dnew.append(np.insert(e, range(1, len(e)+1, 2), poses3d[1::3])) # poses3d = poses3dnew # print (bidx) # print (len(enc_in[bidx])) # print (enc_in[bidx]) # print (data_mean_2d) # print (data_mean_3d) # data_mean_2d_modified = np.delete(data_mean_3d, np.arange(2, data_mean_3d.size, 3)) # data_std_2d_modified = np.delete(data_std_3d, np.arange(2, data_std_3d.size, 3)) # denormalize # enc_in_modified.append(data_utils.unNormalizeData( enc_in[bidx], data_mean_2d_modified, data_std_2d_modified, dim_to_ignore_2d )) enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) all_poses_3d.append(poses3d) # print (len(enc_in[bidx])) # print (len(poses3d)) # print (len(enc_in[0])) # print (len(poses3d[0])) # Put all the poses together # enc_in_modified = np.vstack(enc_in_modified) enc_in, dec_out, poses3d = map(np.vstack, [enc_in, dec_out, all_poses_3d]) # print (len(enc_in[0])) # print (len(poses3d[0])) # print (enc_in.shape) # print (poses3d.shape) # poses3dnew = [] # for p, e in zip(poses3d, enc_in_modified): # poses3dnew.append(np.insert(e, range(1, len(e)+1, 2), p[1::3])) # poses3d = np.array(poses3dnew) if convert_to_world: # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile(test_root_positions[key3d], [1, N_JOINTS_H36M]) # Load the appropriate camera subj, _, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj, c + 1): rcams[(subj, c + 1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [ scams[(subj, c + 1)][-1] for c in range(N_CAMERAS) ].index(cname) # index of camera used the_cam = scams[(subj, scam_idx + 1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame( data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape( (-1, N_JOINTS_H36M * 3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:, :3], (1, N_JOINTS_H36M)) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) poses3d = cam2world_centered(poses3d) poses3dnew = dec_out.copy() poses3dnew[:, 1::3] = poses3d[:, 1::3] poses3d = poses3dnew return enc_in, dec_out, poses3d
def train(): """Train a Sequence to sequence model on human motion""" actions = data_utils.define_actions(FLAGS.action) number_of_actions = len(actions) rcams, vcams = cameras.load_cameras('cameras.h5', [1, 5, 6, 7, 8, 9, 11]) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions, offsets_train, offsets_test = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, vcams) if (FLAGS.use_sh): train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams, vcams) print("done reading and normalizing data.") # Limit TF to take a fraction of the GPU memory gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === isTraining = True model = create_model(sess, isTraining, dim_to_use_3d, FLAGS.batch_size, data_mean_3d, data_std_3d, dim_to_ignore_3d) model.train_writer.add_graph(sess.graph) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 for _ in xrange(FLAGS.epochs): current_epoch = current_epoch + 1 # === Training for an epoch === encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True) nbatches = len(encoder_inputs) print("There are {0} train batches".format(nbatches)) start_time, loss = time.time(), 0. for i in range(nbatches): if (i + 1) % 100 == 0: print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i + 1, nbatches), end="") enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] isTraining = True step_loss, loss_summary, lr_summary, outputs = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining) if (i + 1) % 100 == 0: model.train_writer.add_summary(loss_summary, current_step) model.train_writer.add_summary(lr_summary, current_step) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format(1000 * step_time / 100)) loss += step_loss current_step += 1 loss = loss / nbatches print("==========================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "==========================" % (model.global_step.eval(), model.learning_rate.eval(), loss)) # === Test for an epoch === isTraining = False print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: tot_act_err = 0 print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) action_test_set_2d_gt = [] for key2d in action_test_set_2d.keys(): (subj, b, fname) = key2d # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else ( subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if (fname.endswith('-sh') and FLAGS.camera_frame) else key3d #key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format(fname.split('.')[0])) if fname.endswith('-sh'): fname = fname[:-3] enc_in = {} dec_out = {} enc_in[key2d] = test_set_2d[key2d] dec_out[key3d] = test_set_3d[key3d] pose_2d_gt_list = [] encoder_inputs, decoder_outputs = model.get_all_batches( enc_in, dec_out, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, current_step, encoder_inputs, decoder_outputs) tot_act_err = tot_act_err + act_err print("{0:>6.2f}".format(tot_act_err / len(action_test_set_2d.keys()))) cum_err = cum_err + tot_act_err / len( action_test_set_2d.keys()) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) # Log the error to tensorboard summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) print("Saving the model... ", end="") start_time = time.time() # Save the model model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step) print("done in {0:02f} seconds".format(time.time() - start_time)) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
import data_utils from progress.progress.bar import Bar as Bar import utils as utils import misc as misc import log as log import cameras 1 from pykalman import KalmanFilter from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt action = 'All' actions = data_utils.define_actions( action ) cameras_path = "./data/h36m/cameras.h5" TRAIN_TEST_ID = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(cameras_path, TRAIN_TEST_ID) # Load data data_dir = './data/h36m/' camera_frame = True predict_14 = False # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, data_dir, camera_frame, rcams, predict_14 ) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections #use stacked hourgalss use_sh = False if use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, data_dir) else:
import rospy import os os.chdir('/home/fan/3d-pose-baseline-master/') print('reading data, please wait...') FLAGS = tf.app.flags.FLAGS enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) order = [15, 12, 25, 26, 27, 17, 18, 19, 1, 2, 3, 6, 7, 8] SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) print('######reading data finished!######') device_count = {"GPU": 1} def callback(data): if data.data == 'hello': print(data.data) else: global FLAGS global enc_in