def test(): """ Evaluate on test set """ actions = data_utils.define_actions(FLAGS.action) number_of_actions = len(actions) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model(sess, actions, FLAGS.batch_size) model.train_writer.add_graph(sess.graph) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 for _ in xrange(1): # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs, _ = data_utils.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False,\ n_context=FLAGS.n_context, new_dim=False, batch_size=FLAGS.batch_size) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, test_set_2d=action_test_set_2d) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) # summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err/float(len(actions)))} ) # model.test_writer.add_summary( summaries, current_step ) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: pass # n_joints = 17 if not(FLAGS.predict_14) else 14 # encoder_inputs, decoder_outputs, _ = data_utils.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame,\ # training=False, n_context=FLAGS.n_context, new_dim=False, batch_size=FLAGS.batch_size) # # total_err, joint_err, step_time, loss = evaluate_batches( sess, model, # data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, # data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, # current_step, encoder_inputs, decoder_outputs, current_epoch ) # # print("=============================\n" # "Step-time (ms): %.4f\n" # "Val loss avg: %.4f\n" # "Val error avg (mm): %.2f\n" # "=============================" % ( 1000*step_time, loss, total_err )) # # for i in range(n_joints): # # 6 spaces, right-aligned, 5 decimal places # print("Error in joint {0:02d} (mm): {1:>5.2f}".format(i+1, joint_err[i])) # print("=============================") # Log the error to tensorboard # summaries = sess.run( model.err_mm_summary, {model.err_mm: total_err} ) # model.test_writer.add_summary( summaries, current_step ) # Save the model # print( "Saving the model... ", end="" ) # start_time = time.time() # model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step ) # print( "done in {0:.2f} ms".format(1000*(time.time() - start_time)) ) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def train(): """Train a linear model for 3d pose estimation""" actions = data_utils.define_actions( FLAGS.action ) number_of_actions = len( actions ) # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True )) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model( sess, actions, FLAGS.batch_size ) model.train_writer.add_graph( sess.graph ) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 for _ in xrange( FLAGS.epochs ): current_epoch = current_epoch + 1 # === Load training batches for one epoch === encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True ) nbatches = len( encoder_inputs ) print("There are {0} train batches".format( nbatches )) start_time, loss = time.time(), 0. # === Loop through all the training batches === for i in range( nbatches ): if (i+1) % log_every_n_batches == 0: # Print progress every log_every_n_batches batches print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i+1, nbatches), end="" ) enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] step_loss, loss_summary, lr_summary, _ = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining=True ) if (i+1) % log_every_n_batches == 0: # Log and print progress every log_every_n_batches batches model.train_writer.add_summary( loss_summary, current_step ) model.train_writer.add_summary( lr_summary, current_step ) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format( 1000*step_time / log_every_n_batches ) ) loss += step_loss current_step += 1 # === end looping through training batches === loss = loss / nbatches print("=============================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "=============================" % (model.global_step.eval(), model.learning_rate.eval(), loss) ) # === End training for an epoch === # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset( test_set_2d, action ) action_test_set_3d = get_action_subset( test_set_3d, action ) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs ) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err/float(len(actions)))} ) model.test_writer.add_summary( summaries, current_step ) print("{0:<12} {1:>6.2f}".format("Average", cum_err/float(len(actions) ))) print("{0:=^19}".format('')) else: n_joints = 17 if not(FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, current_epoch ) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % ( 1000*step_time, loss, total_err )) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format(i+1, joint_err[i])) print("=============================") # Log the error to tensorboard summaries = sess.run( model.err_mm_summary, {model.err_mm: total_err} ) model.test_writer.add_summary( summaries, current_step ) # Save the model print( "Saving the model... ", end="" ) start_time = time.time() model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step ) print( "done in {0:.2f} ms".format(1000*(time.time() - start_time)) ) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def hankgogo(gogodata, gogodatafake): """Get samples from a model and visualize them""" actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) #if FLAGS.use_sh: # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) #else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 1 model = create_model_my(sess, actions, batch_size) print("Model loaded") # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 poses3d = model.step(sess, gogodata, isTraining=False) tesmp = poses3d poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) model.saver.save(sess, os.path.join(mysave_dir, "gogo")) # Grab a random batch to visualize # enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, poses3d] ) # idx = np.random.permutation( enc_in.shape[0] ) # enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :] # Visualize random samples import matplotlib.gridspec as gridspec # 1080p = 1,920 x 1,080 fig = plt.figure(figsize=(19.2, 10.8)) gs1 = gridspec.GridSpec(5, 9) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') subplot_idx, exidx = 1, 1 nsamples = 1 # Plot 2d pose #ax1 = plt.subplot(gs1[subplot_idx-1]) #p2d = enc_in[exidx,:] #viz.show2Dpose( p2d, ax1 ) #ax1.invert_yaxis() # Plot 3d gt #ax2 = plt.subplot(gs1[subplot_idx], projection='3d') #p3d = dec_out[exidx,:] #viz.show3Dpose( p3d, ax2 ) # Plot 3d predictions ax3 = plt.subplot(gs1[subplot_idx + 1], projection='3d') p3d = poses3d viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") exidx = exidx + 1 subplot_idx = subplot_idx + 3 plt.show()
def main(_): actions_all = data_utils.define_actions("All") actions = data_utils.define_actions("Discussion") # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) train_set_3d = data_utils.remove_first_frame(train_set_3d) test_set_3d = data_utils.remove_first_frame(test_set_3d) train_root_positions = data_utils.remove_first_frame(train_root_positions) test_root_positions = data_utils.remove_first_frame(test_root_positions) print("Finished Read 3D Data") # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions_all, FLAGS.data_dir) # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.transform_to_2d_biframe_prediction(train_set_2d, # test_set_2d, # data_mean_2d, # data_std_2d, # dim_to_ignore_2d, # dim_to_use_2d) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions_all, FLAGS.data_dir, rcams) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.transform_to_2d_biframe_prediction( train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d) SH_TO_GT_PERM = np.array( [SH_NAMES.index(h) for h in H36M_NAMES if h != '' and h in SH_NAMES]) assert np.all(SH_TO_GT_PERM == np.array( [6, 2, 1, 0, 3, 4, 5, 7, 8, 9, 13, 14, 15, 12, 11, 10])) test_set = {} manipulation_dir = os.path.dirname(FLAGS.data_dir) manipulation_dir = os.path.dirname(manipulation_dir) manipulation_dir += '/manipulation_video/' manipulation_folders = glob.glob(manipulation_dir + '*') subj = 1 action = 'manipulation-video' for folder in manipulation_folders: seqname = os.path.basename(folder) with h5py.File(folder + '/' + seqname + '.h5', 'r') as h5f: poses = h5f['poses'][:] # Permute the loaded data to make it compatible with H36M poses = poses[:, SH_TO_GT_PERM, :] # Reshape into n x (32*2) matrix poses = np.reshape(poses, [poses.shape[0], -1]) poses_final = np.zeros([poses.shape[0], len(H36M_NAMES) * 2]) dim_to_use_x = np.where( np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] * 2 dim_to_use_y = dim_to_use_x + 1 dim_to_use = np.zeros(len(SH_NAMES) * 2, dtype=np.int32) dim_to_use[0::2] = dim_to_use_x dim_to_use[1::2] = dim_to_use_y poses_final[:, dim_to_use] = poses print(seqname, poses_final.shape) poses_final[poses_final == 0.] = 0.1 test_set[(subj, action, seqname)] = poses_final test_set = data_utils.uni_frame_to_bi_frame(test_set) test_set_2d = data_utils.normalize_data(test_set, data_mean_2d, data_std_2d, dim_to_use_2d) for key in test_set.keys(): test_set[key] = test_set[key][0::2, :] dim_to_use_12_manipulation_joints = np.array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 51, 52, 53, 54, 55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83 ]) print("Finished Normalize Manipualtion Videos") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = FLAGS.batch_size #Intial code is 64*2 model = predict_3dpose_biframe.create_model(sess, actions_all, batch_size) print("Model loaded") j = 0 for key2d in test_set_2d.keys(): (subj, b, fname) = key2d # if fname != specific_seqname + '.h5': # continue print("Subject: {}, action: {}, fname: {}".format(subj, b, fname)) enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // 1) all_poses_3d = [] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 anything = np.zeros((enc_in[bidx].shape[0], 48)) _, _, poses3d = model.step(sess, enc_in[bidx], anything, dp, isTraining=False) # Denormalize enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) all_poses_3d.append(poses3d) # Put all the poses together enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) enc_in, poses3d = map(np.vstack, [enc_in, poses3d]) poses3d_12_manipulation = poses3d[:, dim_to_use_12_manipulation_joints] annotated_images = glob.glob(manipulation_dir + fname + '/info/*.xml') annotated_images = sorted(annotated_images) # 1080p = 1,920 x 1,080 fig = plt.figure(j, figsize=(10, 10)) gs1 = gridspec.GridSpec(3, 3) gs1.update(wspace=-0, hspace=0.1) # set the spacing between axes. plt.axis('off') subplot_idx = 1 nsamples = 3 for i in np.arange(nsamples): # Plot 2d Detection ax1 = plt.subplot(gs1[subplot_idx - 1]) img = mpimg.imread( manipulation_dir + fname + '/skeleton_cropped/' + os.path.basename(annotated_images[i]).split('_')[0] + '.jpg') ax1.imshow(img) # Plot 2d pose ax2 = plt.subplot(gs1[subplot_idx]) # p2d = enc_in[i,:] # viz.show2Dpose( p2d, ax2 ) # ax2.invert_yaxis() ax2.imshow(img) # Plot 3d predictions # Compute first the procrustion and print error gt = getJ3dPosFromXML(annotated_images[i]) A = poses3d_12_manipulation[i, :].reshape(gt.shape) _, Z, T, b, c = procrustes.compute_similarity_transform( gt, A, compute_optimal_scale=True) sqerr = np.sqrt(np.sum((gt - (b * A.dot(T)) - c)**2, axis=1)) print("{0} - {1} - Mean Error (mm) : {2}".format( fname, os.path.basename(annotated_images[i]), np.mean(sqerr))) ax3 = plt.subplot(gs1[subplot_idx + 1], projection='3d') temp = poses3d[i, :].reshape((32, 3)) temp = c + temp.dot(T) #Do not scale # p3d = temp.reshape((1, 96)) p3d = poses3d[i, :] viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") ax3.invert_zaxis() ax3.invert_yaxis() subplot_idx = subplot_idx + 3 plt.show() j += 1
def test(): actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model(sess, actions, FLAGS.batch_size) model.train_writer.add_graph(sess.graph) current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 if FLAGS.evaluateActionWise: logger.info("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs cum_err = 0 # select the mixture model which has mininum error for action in actions: # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs, repro_info = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err logger.info('{0:<12} {1:>6.2f}'.format(action, act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) logger.info('{0:<12} {1:>6.2f}'.format( "Average", cum_err / float(len(actions)))) logger.info('{0:=^19}'.format(''))
def sample(): """Get samples from a model and visualize them""" path = '{}/samples_sh'.format(FLAGS.train_dir) if not os.path.exists(path): os.makedirs(path) actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) n_joints = 17 if not (FLAGS.predict_14) else 14 # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d, _ = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d # choose SittingDown action to visualize if b == 'SittingDown': print("Subject: {}, action: {}, fname: {}".format( subj, b, fname)) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else ( subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if ( fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape dec_out = test_set_3d[key3d] n3d, _ = dec_out.shape assert n2d == n3d # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // batch_size) dec_out = np.array_split(dec_out, n3d // batch_size) # store all pose hypotheses in a list pose_3d_mdm = [[], [], [], [], []] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 loss, _, out_all_components = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # denormalize the input 2d pose, ground truth 3d pose as well as 3d pose hypotheses from mdm out_all_components = np.reshape( out_all_components, [-1, model.HUMAN_3D_SIZE + 2, model.num_models]) out_mean = out_all_components[:, :model.HUMAN_3D_SIZE, :] enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d) poses3d = np.zeros( (out_mean.shape[0], 96, out_mean.shape[-1])) for j in range(out_mean.shape[-1]): poses3d[:, :, j] = data_utils.unNormalizeData( out_mean[:, :, j], data_mean_3d, data_std_3d, dim_to_ignore_3d) # extract the 17 joints dtu3d = np.hstack( (np.arange(3), dim_to_use_3d )) if not (FLAGS.predict_14) else dim_to_use_3d dec_out_17 = dec_out[bidx][:, dtu3d] pose_3d_17 = poses3d[:, dtu3d, :] sqerr = (pose_3d_17 - np.expand_dims(dec_out_17, axis=2))**2 dists = np.zeros( (sqerr.shape[0], n_joints, sqerr.shape[2])) for m in range(dists.shape[-1]): dist_idx = 0 for k in np.arange(0, n_joints * 3, 3): dists[:, dist_idx, m] = np.sqrt( np.sum(sqerr[:, k:k + 3, m], axis=1)) dist_idx = dist_idx + 1 [ pose_3d_mdm[i].append(poses3d[:, :, i]) for i in range(poses3d.shape[-1]) ] # Put all the poses together enc_in, dec_out = map(np.vstack, [enc_in, dec_out]) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = np.vstack(pose_3d_mdm[i]) # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile(test_root_positions[key3d], [1, N_JOINTS_H36M]) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = pose_3d_mdm[i] + np.tile( test_root_positions[key3d], [1, N_JOINTS_H36M]) # Load the appropriate camera subj, action, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj, c + 1): rcams[(subj, c + 1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [ scams[(subj, c + 1)][-1] for c in range(N_CAMERAS) ].index(cname) # index of camera used the_cam = scams[(subj, scam_idx + 1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame( data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape( (-1, N_JOINTS_H36M * 3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:, :3], (1, N_JOINTS_H36M)) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = cam2world_centered(pose_3d_mdm[i]) # sample some results to visualize np.random.seed(42) idx = np.random.permutation(enc_in.shape[0]) enc_in, dec_out = enc_in[idx, :], dec_out[idx, :] for i in range(poses3d.shape[-1]): pose_3d_mdm[i] = pose_3d_mdm[i][idx, :] exidx = 1 nsamples = 20 for i in np.arange(nsamples): fig = plt.figure(figsize=(20, 5)) subplot_idx = 1 gs1 = gridspec.GridSpec(1, 7) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') # Plot 2d pose ax1 = plt.subplot(gs1[subplot_idx - 1]) p2d = enc_in[exidx, :] viz.show2Dpose(p2d, ax1) ax1.invert_yaxis() # Plot 3d gt ax2 = plt.subplot(gs1[subplot_idx], projection='3d') p3d = dec_out[exidx, :] viz.show3Dpose(p3d, ax2) # Plot 3d pose hypotheses for i in range(poses3d.shape[-1]): ax3 = plt.subplot(gs1[subplot_idx + i + 1], projection='3d') p3d = pose_3d_mdm[i][exidx] viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71") # plt.show() plt.savefig('{}/sample_{}_{}_{}_{}.png'.format( path, subj, action, scam_idx, exidx)) plt.close(fig) exidx = exidx + 1
def sample(): actions = data_utils.define_actions( FLAGS.action ) SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d print( "Subject: {}, action: {}, fname: {}".format(subj, b, fname) ) key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if (fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[ key2d ] n2d, _ = enc_in.shape dec_out = test_set_3d[ key3d ] n3d, _ = dec_out.shape assert n2d == n3d enc_in = np.array_split( enc_in, n2d // batch_size ) dec_out = np.array_split( dec_out, n3d // batch_size ) all_poses_3d = [] for bidx in range( len(enc_in) ): dp = 1.0 _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d ) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d ) poses3d = data_utils.unNormalizeData( poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d ) all_poses_3d.append( poses3d ) enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, all_poses_3d] ) if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 dec_out = dec_out + np.tile( test_root_positions[ key3d ], [1,N_JOINTS_H36M] ) subj, _, sname = key3d cname = sname.split('.')[1] scams = {(subj,c+1): rcams[(subj,c+1)] for c in range(N_CAMERAS)} scam_idx = [scams[(subj,c+1)][-1] for c in range(N_CAMERAS)].index( cname ) the_cam = scams[(subj, scam_idx+1)] R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = camera_to_world_frame(data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape((-1, N_JOINTS_H36M*3)) return data_3d_worldframe - np.tile( data_3d_worldframe[:,:3], (1,N_JOINTS_H36M) ) dec_out = cam2world_centered(dec_out) poses3d = cam2world_centered(poses3d) enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, poses3d] ) idx = np.random.permutation( enc_in.shape[0] ) enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :] import matplotlib.gridspec as gridspec fig = plt.figure( figsize=(19.2, 10.8) ) gs1 = gridspec.GridSpec(5, 9) gs1.update(wspace=-0.00, hspace=0.05) plt.axis('off') subplot_idx, exidx = 1, 1 nsamples = 15 for i in np.arange( nsamples ): ax1 = plt.subplot(gs1[subplot_idx-1]) p2d = enc_in[exidx,:] viz.show2Dpose( p2d, ax1 ) ax1.invert_yaxis() ax2 = plt.subplot(gs1[subplot_idx], projection='3d') p3d = dec_out[exidx,:] viz.show3Dpose( p3d, ax2 ) ax3 = plt.subplot(gs1[subplot_idx+1], projection='3d') p3d = poses3d[exidx,:] viz.show3Dpose( p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71" ) exidx = exidx + 1 subplot_idx = subplot_idx + 3 plt.show()
def mpi(): actions = data_utils.define_actions(FLAGS.action) number_of_actions = len(actions) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir + 'train_images.txt', FLAGS.data_dir + 'valid_images.txt', FLAGS.data_dir + 'train.h5', FLAGS.data_dir + 'valid.h5', FLAGS.camera_frame, rcams, FLAGS.predict_14) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir + 'train_images.txt', FLAGS.data_dir + 'valid_images.txt', FLAGS.data_dir + 'train.h5', FLAGS.data_dir + 'valid.h5') else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir + 'train_images.txt', FLAGS.data_dir + 'valid_images.txt', FLAGS.data_dir + 'train.h5', FLAGS.data_dir + 'valid.h5', rcams) print("done reading and normalizing data.") # Load MPI data for testing: mpi_test3d, mpi_mean3d, mpi_std3d, mpi_test2d, mpi_mean2d, mpi_std2d = data_utils.read_mpi( '/mnt/lustre/xingyifei/test_3dhp/annotTest.h5', True, data_mean_2d, data_mean_3d) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model(sess, actions, FLAGS.batch_size) model.train_writer.add_graph(sess.graph) print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 # === Testing after this epoch === isTraining = False n_joints = 16 if not (FLAGS.predict_14) else 14 #Process inputs to batches n = len(mpi_test3d) n_extra = n % model.batch_size if n_extra > 0: # Otherwise examples are already a multiple of batch size encoder_inputs = mpi_test2d[:-n_extra, :, :].reshape(-1, 32) decoder_outputs = mpi_test3d[:-n_extra, :, :].reshape(-1, 48) n_batches = n // model.batch_size encoder_inputs = np.split(encoder_inputs, n_batches) decoder_outputs = np.split(decoder_outputs, n_batches) nbatches = len(encoder_inputs) # Loop through test examples all_dists, start_time, loss = [], time.time(), 0. log_every_n_batches = 100 for i in range(nbatches): if current_epoch > 0 and (i + 1) % log_every_n_batches == 0: print("Working on test epoch {0}, batch {1} / {2}".format( current_epoch, i + 1, nbatches)) enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] dp = 1.0 # dropout keep probability is always 1 at test time step_loss, loss_summary, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) loss += step_loss # denormalize enc_in = (enc_in.reshape(-1, 16, 2) * mpi_std2d + mpi_mean2d).reshape(-1, 32) dec_out = (dec_out.reshape(-1, 16, 3) * mpi_std3d + mpi_mean3d).reshape(-1, 48) poses3d = (poses3d.reshape(-1, 16, 3) * mpi_std3d + mpi_mean3d).reshape(-1, 48) assert dec_out.shape[0] == FLAGS.batch_size assert poses3d.shape[0] == FLAGS.batch_size if FLAGS.procrustes: # Apply per-frame procrustes alignment if asked to do so for j in range(FLAGS.batch_size): gt = np.reshape(dec_out[j, :], [-1, 3]) out = np.reshape(poses3d[j, :], [-1, 3]) _, Z, T, b, c = procrustes.compute_similarity_transform( gt, out, compute_optimal_scale=True) out = (b * out.dot(T)) + c poses3d[j, :] = np.reshape(out, [-1, 16 * 3]) if not ( FLAGS.predict_14) else np.reshape(out, [-1, 14 * 3]) # Compute Euclidean distance error per joint sqerr = ( poses3d - dec_out )**2 # Squared error between prediction and expected output dists = np.zeros((sqerr.shape[0], n_joints)) # Array with L2 error per joint in mm dist_idx = 0 for k in np.arange(0, n_joints * 3, 3): # Sum across X,Y, and Z dimenstions to obtain L2 distance dists[:, dist_idx] = np.sqrt(np.sum(sqerr[:, k:k + 3], axis=1)) dist_idx = dist_idx + 1 all_dists.append(dists) assert sqerr.shape[0] == FLAGS.batch_size step_time = (time.time() - start_time) / nbatches loss = loss / nbatches all_dists = np.vstack(all_dists) PCK_150 = all_dists < 150 from pdb import set_trace as st st() #AUC Metric non_zero = np.count_nonzero(PCK_150, axis=1) * 1. zeros = np.ones(non_zero.shape) * len(PCK_150[0]) - non_zero TP = non_zero / len(PCK_150[0]) FP = zeros / len(PCK_150[0]) AUC = (1 + TP - FP) / 2. AUC = np.mean(AUC) #PCK@150 PCK = np.mean(np.mean(PCK_150, axis=1)) # Error per joint and total for all passed batches joint_err = np.mean(all_dists, axis=0) total_err = np.mean(all_dists) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % (1000 * step_time, loss, total_err)) print("PCK ERROR: " + str(PCK)) print("AUC ERROR: " + str(AUC)) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format( i + 1, joint_err[i])) print("=============================")
def test(): SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] actions = data_utils.define_actions(FLAGS.action) rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections """ train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils_org.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils_org.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils_org.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) """ # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions, augmented3d, train_set_3d_for_noisy, data_mean_3d_test, data_std_3d_test = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14, FLAGS.augment_all, FLAGS.augment_rot, FLAGS.augment_flip, FLAGS.augment_trans, FLAGS.add_noise, FLAGS.add_kinematics) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d, train_set_2d_for_noisy, data_mean_2d_test, data_std_2d_test = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams, augmented3d) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) with tf.Session(config=tf.ConfigProto(device_count=device_count, gpu_options=gpu_options)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = FLAGS.batch_size model = create_model(sess, actions, batch_size) print("Model loaded") if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format("Action", "mm")) # line of 30 equal signs cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d_test, data_std_3d_test, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d_test, data_std_2d_test, dim_to_use_2d, dim_to_ignore_2d, 0, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: n_joints = 17 if not (FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d_test, data_std_3d_test, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d_test, data_std_2d_test, dim_to_use_2d, dim_to_ignore_2d, 0, encoder_inputs, decoder_outputs, FLAGS.epochs) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % (1000 * step_time, loss, total_err)) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format( i + 1, joint_err[i])) print("=============================")
def train(): actions = data_utils.define_actions( FLAGS.action ) number_of_actions = len( actions ) SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = load_cameras(FLAGS.cameras_path, SUBJECT_IDS) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count=device_count, allow_soft_placement=True )) as sess: print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) model = create_model( sess, actions, FLAGS.batch_size ) model.train_writer.add_graph( sess.graph ) print("Model created") step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 for _ in xrange( FLAGS.epochs ): current_epoch = current_epoch + 1 encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True ) nbatches = len( encoder_inputs ) print("There are {0} train batches".format( nbatches )) start_time, loss = time.time(), 0. for i in range( nbatches ): if (i+1) % log_every_n_batches == 0: print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i+1, nbatches), end="" ) enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] step_loss, loss_summary, lr_summary, _ = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining=True ) if (i+1) % log_every_n_batches == 0: model.train_writer.add_summary( loss_summary, current_step ) model.train_writer.add_summary( lr_summary, current_step ) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format( 1000*step_time / log_every_n_batches ) ) loss += step_loss current_step += 1 loss = loss / nbatches print("=============================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "=============================" % (model.global_step.eval(), model.learning_rate.eval(), loss) ) isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format("Action", "mm")) cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") action_test_set_2d = get_action_subset( test_set_2d, action ) action_test_set_3d = get_action_subset( test_set_3d, action ) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs ) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err/float(len(actions)))} ) model.test_writer.add_summary( summaries, current_step ) print("{0:<12} {1:>6.2f}".format("Average", cum_err/float(len(actions) ))) print("{0:=^19}".format('')) else: n_joints = 17 if not(FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, current_epoch ) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % ( 1000*step_time, loss, total_err )) for i in range(n_joints): print("Error in joint {0:02d} (mm): {1:>5.2f}".format(i+1, joint_err[i])) print("=============================") summaries = sess.run( model.err_mm_summary, {model.err_mm: total_err} ) model.test_writer.add_summary( summaries, current_step ) print( "Saving the model... ", end="" ) start_time = time.time() model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step ) print( "done in {0:.2f} ms".format(1000*(time.time() - start_time)) ) step_time, loss = 0, 0 sys.stdout.flush()
def sample(): """Get samples from a model and visualize them""" actions = data_utils.define_actions( FLAGS.action ) # Load camera parameters SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14 ) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams ) print( "done reading and normalizing data." ) device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d print( "Subject: {}, action: {}, fname: {}".format(subj, b, fname) ) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format(fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if (fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[ key2d ] n2d, _ = enc_in.shape dec_out = test_set_3d[ key3d ] n3d, _ = dec_out.shape assert n2d == n3d # Split into about-same-size batches enc_in = np.array_split( enc_in, n2d // batch_size ) dec_out = np.array_split( dec_out, n3d // batch_size ) all_poses_3d = [] for bidx in range( len(enc_in) ): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # denormalize enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d ) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d ) poses3d = data_utils.unNormalizeData( poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d ) all_poses_3d.append( poses3d ) # Put all the poses together enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, all_poses_3d] ) # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile( test_root_positions[ key3d ], [1,N_JOINTS_H36M] ) # Load the appropriate camera subj, _, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj,c+1): rcams[(subj,c+1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [scams[(subj,c+1)][-1] for c in range(N_CAMERAS)].index( cname ) # index of camera used the_cam = scams[(subj, scam_idx+1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame(data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape((-1, N_JOINTS_H36M*3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:,:3], (1,N_JOINTS_H36M) ) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) poses3d = cam2world_centered(poses3d) # Grab a random batch to visualize enc_in, dec_out, poses3d = map( np.vstack, [enc_in, dec_out, poses3d] ) idx = np.random.permutation( enc_in.shape[0] ) enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :] # Visualize random samples import matplotlib.gridspec as gridspec # 1080p = 1,920 x 1,080 fig = plt.figure( figsize=(19.2, 10.8) ) gs1 = gridspec.GridSpec(5, 9) # 5 rows, 9 columns gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') subplot_idx, exidx = 1, 1 nsamples = 15 for i in np.arange( nsamples ): # Plot 2d pose ax1 = plt.subplot(gs1[subplot_idx-1]) p2d = enc_in[exidx,:] viz.show2Dpose( p2d, ax1 ) ax1.invert_yaxis() # Plot 3d gt ax2 = plt.subplot(gs1[subplot_idx], projection='3d') p3d = dec_out[exidx,:] viz.show3Dpose( p3d, ax2 ) # Plot 3d predictions ax3 = plt.subplot(gs1[subplot_idx+1], projection='3d') p3d = poses3d[exidx,:] viz.show3Dpose( p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71" ) exidx = exidx + 1 subplot_idx = subplot_idx + 3 plt.show()
def train(): """Train a linear model for 3d pose estimation""" actions = data_utils.define_actions( FLAGS.action) #returns a list of corresponding actions number_of_actions = len(actions) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] #1,5,6,7,8 for train, 9,11 for test rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) #得到了关于camera的参数 #print("相机的参数") # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) #print(data_mean_3d.shape[1],"平均值的大小") #assert 1==2,"debug结束" #train_set_3d是个dict,key为(S的编码,action,文件名)+pose(n,96)的大小 # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data", test_set_2d.shape) # Avoid using the GPU if requested device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # === Create the model === print("Creating %d bi-layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) #打印出来的是Creating 2 bi-layers of 1024 units. model = create_model(sess, actions, FLAGS.batch_size) #FLAGS的batch_size是64 model.train_writer.add_graph(sess.graph) #将图添加到tensorboard中 print("Model created") #=== This is the training loop === step_time, loss, val_loss = 0.0, 0.0, 0.0 current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 #之后需要载入checkpoint previous_losses = [] step_time, loss = 0, 0 current_epoch = 0 log_every_n_batches = 100 #每100次打印一下 for _ in xrange(FLAGS.epochs): #与range类似但也有不同之处 current_epoch = current_epoch + 1 # === Load training batches for one epoch === 在这里输入和输出的还是32和48为的 encoder_inputs, decoder_outputs = model.get_all_batches( train_set_2d, train_set_3d, FLAGS.camera_frame, training=True) #按照batch的大小对输入和输出进行切片,切成[array(n1,32/48),array(n2,32/48),...,...] nbatches = len(encoder_inputs) print("There are {0} train batches".format( nbatches)) #24371个branches start_time, loss = time.time(), 0. # === Loop through all the training batches === for i in range(nbatches): if (i + 1) % log_every_n_batches == 0: # Print progress every log_every_n_batches batches print("Working on epoch {0}, batch {1} / {2}... ".format( current_epoch, i + 1, nbatches), end="") enc_in, dec_out = encoder_inputs[i], decoder_outputs[i] #print(enc_in.shape,dec_out.shape) (64,32)和(64,48) #将输入值输入到model模型中, step_loss, loss_summary, lr_summary, _ = model.step( sess, enc_in, dec_out, FLAGS.dropout, isTraining=True) if (i + 1) % log_every_n_batches == 0: # Log and print progress every log_every_n_batches batches 每100次就将结果写到tensorboard中去 model.train_writer.add_summary(loss_summary, current_step) model.train_writer.add_summary(lr_summary, current_step) step_time = (time.time() - start_time) start_time = time.time() print("done in {0:.2f} ms".format(1000 * step_time / log_every_n_batches)) loss += step_loss current_step += 1 # === end looping through training batches === loss = loss / nbatches print("=============================\n" "Global step: %d\n" "Learning rate: %.2e\n" "Train loss avg: %.4f\n" "=============================" % (model.global_step.eval(), model.learning_rate.eval(), loss)) # === End training for an epoch === # === Testing after this epoch === isTraining = False if FLAGS.evaluateActionWise: print("{0:=^12} {1:=^6}".format( "Action", "mm")) # line of 30 equal signs 即为====Action====mm cum_err = 0 for action in actions: print("{0:<12} ".format(action), end="") # Get 2d and 3d testing data for this action 并将他们按照batch的大小切割好(不需要训练的话,就不需要随机排列了 action_test_set_2d = get_action_subset(test_set_2d, action) action_test_set_3d = get_action_subset(test_set_3d, action) encoder_inputs, decoder_outputs = model.get_all_batches( action_test_set_2d, action_test_set_3d, FLAGS.camera_frame, training=False) #evaluate_batches要好好看一下 act_err, _, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs) cum_err = cum_err + act_err print("{0:>6.2f}".format(act_err)) summaries = sess.run( model.err_mm_summary, {model.err_mm: float(cum_err / float(len(actions)))}) model.test_writer.add_summary(summaries, current_step) print("{0:<12} {1:>6.2f}".format("Average", cum_err / float(len(actions)))) print("{0:=^19}".format('')) else: n_joints = 17 if not (FLAGS.predict_14) else 14 encoder_inputs, decoder_outputs = model.get_all_batches( test_set_2d, test_set_3d, FLAGS.camera_frame, training=False) total_err, joint_err, step_time, loss = evaluate_batches( sess, model, data_mean_3d, data_std_3d, dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs, decoder_outputs, current_epoch) print("=============================\n" "Step-time (ms): %.4f\n" "Val loss avg: %.4f\n" "Val error avg (mm): %.2f\n" "=============================" % (1000 * step_time, loss, total_err)) for i in range(n_joints): # 6 spaces, right-aligned, 5 decimal places print("Error in joint {0:02d} (mm): {1:>5.2f}".format( i + 1, joint_err[i])) print("=============================") # Log the error to tensorboard summaries = sess.run(model.err_mm_summary, {model.err_mm: total_err}) model.test_writer.add_summary(summaries, current_step) # Save the model print("Saving the model... ", end="") start_time = time.time() model.saver.save(sess, os.path.join(train_dir, 'checkpoint'), global_step=current_step) print("done in {0:.2f} ms".format(1000 * (time.time() - start_time))) # Reset global time and loss step_time, loss = 0, 0 sys.stdout.flush()
def predict(convert_to_world): """ Run the model and predict pose data convert_to_world is a flag indicating whether to convert the data back to world coordinates from the camera frame. """ actions = data_utils.define_actions(FLAGS.action) # Load camera parameters SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) if FLAGS.use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( actions, FLAGS.data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) print("done reading and normalizing data.") device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess: # === Create the model === print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.linear_size)) batch_size = 128 model = create_model(sess, actions, batch_size) print("Model loaded") for key2d in test_set_2d.keys(): (subj, b, fname) = key2d print("Subject: {}, action: {}, fname: {}".format(subj, b, fname)) # keys should be the same if 3d is in camera coordinates key3d = key2d if FLAGS.camera_frame else (subj, b, '{0}.h5'.format( fname.split('.')[0])) key3d = (subj, b, fname[:-3]) if ( fname.endswith('-sh')) and FLAGS.camera_frame else key3d enc_in = test_set_2d[key2d] n2d, _ = enc_in.shape dec_out = test_set_3d[key3d] n3d, _ = dec_out.shape assert n2d == n3d # Generate the loss pairs loss_pairs = None if model.num_loss_pairs: num_pts = int(model.HUMAN_3D_SIZE / 3) pairs = np.asarray([(i, j) for i in range(num_pts) for j in range(num_pts) if i < j]) pair_idxs = [ np.random.choice(len(pairs), model.num_loss_pairs, replace=False) for _ in range(n3d) ] loss_pairs = np.take(pairs, pair_idxs, axis=0) loss_pairs = np.array_split(loss_pairs, n2d // batch_size) # Split into about-same-size batches enc_in = np.array_split(enc_in, n2d // batch_size) dec_out = np.array_split(dec_out, n3d // batch_size) all_poses_3d = [] # enc_in_modified = [] for bidx in range(len(enc_in)): # Dropout probability 0 (keep probability 1) for sampling dp = 1.0 if model.num_loss_pairs: _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, loss_pairs=loss_pairs[bidx], isTraining=False) else: _, _, poses3d = model.step(sess, enc_in[bidx], dec_out[bidx], dp, isTraining=False) # poses3dnew = [] # for e in enc_in[bidx]: # poses3dnew.append(np.insert(e, range(1, len(e)+1, 2), poses3d[1::3])) # poses3d = poses3dnew # print (bidx) # print (len(enc_in[bidx])) # print (enc_in[bidx]) # print (data_mean_2d) # print (data_mean_3d) # data_mean_2d_modified = np.delete(data_mean_3d, np.arange(2, data_mean_3d.size, 3)) # data_std_2d_modified = np.delete(data_std_3d, np.arange(2, data_std_3d.size, 3)) # denormalize # enc_in_modified.append(data_utils.unNormalizeData( enc_in[bidx], data_mean_2d_modified, data_std_2d_modified, dim_to_ignore_2d )) enc_in[bidx] = data_utils.unNormalizeData( enc_in[bidx], data_mean_2d, data_std_2d, dim_to_ignore_2d) dec_out[bidx] = data_utils.unNormalizeData( dec_out[bidx], data_mean_3d, data_std_3d, dim_to_ignore_3d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) all_poses_3d.append(poses3d) # print (len(enc_in[bidx])) # print (len(poses3d)) # print (len(enc_in[0])) # print (len(poses3d[0])) # Put all the poses together # enc_in_modified = np.vstack(enc_in_modified) enc_in, dec_out, poses3d = map(np.vstack, [enc_in, dec_out, all_poses_3d]) # print (len(enc_in[0])) # print (len(poses3d[0])) # print (enc_in.shape) # print (poses3d.shape) # poses3dnew = [] # for p, e in zip(poses3d, enc_in_modified): # poses3dnew.append(np.insert(e, range(1, len(e)+1, 2), p[1::3])) # poses3d = np.array(poses3dnew) if convert_to_world: # Convert back to world coordinates if FLAGS.camera_frame: N_CAMERAS = 4 N_JOINTS_H36M = 32 # Add global position back dec_out = dec_out + np.tile(test_root_positions[key3d], [1, N_JOINTS_H36M]) # Load the appropriate camera subj, _, sname = key3d cname = sname.split('.')[1] # <-- camera name scams = {(subj, c + 1): rcams[(subj, c + 1)] for c in range(N_CAMERAS)} # cams of this subject scam_idx = [ scams[(subj, c + 1)][-1] for c in range(N_CAMERAS) ].index(cname) # index of camera used the_cam = scams[(subj, scam_idx + 1)] # <-- the camera used R, T, f, c, k, p, name = the_cam assert name == cname def cam2world_centered(data_3d_camframe): data_3d_worldframe = cameras.camera_to_world_frame( data_3d_camframe.reshape((-1, 3)), R, T) data_3d_worldframe = data_3d_worldframe.reshape( (-1, N_JOINTS_H36M * 3)) # subtract root translation return data_3d_worldframe - np.tile( data_3d_worldframe[:, :3], (1, N_JOINTS_H36M)) # Apply inverse rotation and translation dec_out = cam2world_centered(dec_out) poses3d = cam2world_centered(poses3d) poses3dnew = dec_out.copy() poses3dnew[:, 1::3] = poses3d[:, 1::3] poses3d = poses3dnew return enc_in, dec_out, poses3d
# Load data data_dir = './data/h36m/' camera_frame = True predict_14 = False # Load 3d data and load (or create) 2d projections train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, data_dir, camera_frame, rcams, predict_14 ) # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections #use stacked hourgalss use_sh = False if use_sh: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(actions, data_dir) else: train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, data_dir, rcams ) print( "done reading and normalizing data." ) stat_3d = {} stat_3d['mean'] = data_mean_3d stat_3d['std'] = data_std_3d stat_3d['dim_use'] = dim_to_use_3d # ============================ # Define Train/Test Methods # ============================ def train(train_loader, model, criterion, optimizer, lr_init=None, lr_now=None, glob_step=None, lr_decay=None, gamma=None, max_norm=True): losses = utils.AverageMeter()
def main(_): smoothed = read_openpose_json_mydata(json_file=FLAGS.json) plt.figure(2) smooth_curves_plot = show_anim_curves(smoothed, plt) # return pngName = 'gif_output/smooth_plot.png' smooth_curves_plot.savefig(pngName) logger.info('writing gif_output/smooth_plot.png') if FLAGS.interpolation: logger.info("start interpolation") framerange = len(smoothed.keys()) joint_rows = 36 array = np.concatenate(list(smoothed.values())) array_reshaped = np.reshape(array, (framerange, joint_rows)) multiplier = FLAGS.multiplier multiplier_inv = 1 / multiplier out_array = np.array([]) for row in range(joint_rows): x = [] for frame in range(framerange): x.append(array_reshaped[frame, row]) frame = range(framerange) frame_resampled = np.arange(0, framerange, multiplier) spl = UnivariateSpline(frame, x, k=3) # relative smooth factor based on jnt anim curve min_x, max_x = min(x), max(x) smooth_fac = max_x - min_x smooth_resamp = 125 smooth_fac = smooth_fac * smooth_resamp spl.set_smoothing_factor(float(smooth_fac)) xnew = spl(frame_resampled) out_array = np.append(out_array, xnew) logger.info( "done interpolating. reshaping {0} frames, please wait!!".format( framerange)) a = np.array([]) for frame in range(int(framerange * multiplier_inv)): jnt_array = [] for jnt in range(joint_rows): jnt_array.append( out_array[jnt * int(framerange * multiplier_inv) + frame]) a = np.append(a, jnt_array) a = np.reshape(a, (int(framerange * multiplier_inv), joint_rows)) out_array = a interpolate_smoothed = {} for frame in range(int(framerange * multiplier_inv)): interpolate_smoothed[frame] = list(out_array[frame]) plt.figure(3) smoothed = interpolate_smoothed interpolate_curves_plot = show_anim_curves(smoothed, plt) pngName = 'gif_output/interpolate_{0}.png'.format(smooth_resamp) interpolate_curves_plot.savefig(pngName) logger.info('writing gif_output/interpolate_plot.png') enc_in = np.zeros((1, 64)) enc_in[0] = [0 for i in range(64)] actions = data_utils.define_actions(FLAGS.action) SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11] rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS) # train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions( # actions, FLAGS.data_dir) train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data( actions, FLAGS.data_dir, rcams) train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data( actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14) device_count = {"GPU": 1} png_lib = [] before_pose = None with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # plt.figure(3) batch_size = 128 model = create_model(sess, actions, batch_size) iter_range = len(smoothed.keys()) export_units = {} twod_export_units = {} for n, (frame, xy) in enumerate(smoothed.items()): logger.info("calc frame {0}/{1}".format(frame, iter_range)) # map list into np array joints_array = np.zeros((1, 36)) joints_array[0] = [0 for i in range(36)] for o in range(len(joints_array[0])): # feed array with xy array joints_array[0][o] = float(xy[o]) twod_export_units[frame] = {} for abs_b, __n in enumerate(range(0, len(xy), 2)): twod_export_units[frame][abs_b] = { "translate": [xy[__n], xy[__n + 1]] } _data = joints_array[0] # mapping all body parts or 3d-pose-baseline format for i in range(len(order)): for j in range(2): # create encoder input enc_in[0][order[i] * 2 + j] = _data[i * 2 + j] for j in range(2): # Hip enc_in[0][0 * 2 + j] = (enc_in[0][1 * 2 + j] + enc_in[0][6 * 2 + j]) / 2 # Neck/Nose enc_in[0][14 * 2 + j] = (enc_in[0][15 * 2 + j] + enc_in[0][12 * 2 + j]) / 2 # Thorax enc_in[0][13 * 2 + j] = 2 * enc_in[0][12 * 2 + j] - enc_in[0][14 * 2 + j] # set spine spine_x = enc_in[0][24] spine_y = enc_in[0][25] enc_in = enc_in[:, dim_to_use_2d] mu = data_mean_2d[dim_to_use_2d] stddev = data_std_2d[dim_to_use_2d] enc_in = np.divide((enc_in - mu), stddev) dp = 1.0 # dec_out = np.zeros((1, 48)) # dec_out[0] = [0 for i in range(48)] dec_out = np.zeros((1, 36)) dec_out[0] = [0 for i in range(36)] _, _, poses3d = model.step(sess, enc_in, dec_out, dp, isTraining=False) all_poses_3d = [] enc_in = data_utils.unNormalizeData(enc_in, data_mean_2d, data_std_2d, dim_to_ignore_2d) poses3d = data_utils.unNormalizeData(poses3d, data_mean_3d, data_std_3d, dim_to_ignore_3d) gs1 = gridspec.GridSpec(1, 1) gs1.update(wspace=-0.00, hspace=0.05) # set the spacing between axes. plt.axis('off') all_poses_3d.append(poses3d) enc_in, poses3d = map(np.vstack, [enc_in, all_poses_3d]) subplot_idx, exidx = 1, 1 _max = 0 _min = 10000 for i in range(poses3d.shape[0]): for j in range(32): tmp = poses3d[i][j * 3 + 2] poses3d[i][j * 3 + 2] = poses3d[i][j * 3 + 1] poses3d[i][j * 3 + 1] = tmp if poses3d[i][j * 3 + 2] > _max: _max = poses3d[i][j * 3 + 2] if poses3d[i][j * 3 + 2] < _min: _min = poses3d[i][j * 3 + 2] for i in range(poses3d.shape[0]): for j in range(32): poses3d[i][j * 3 + 2] = _max - poses3d[i][j * 3 + 2] + _min poses3d[i][j * 3] += (spine_x - 630) poses3d[i][j * 3 + 2] += (500 - spine_y) # Plot 3d predictions ax = plt.subplot(gs1[subplot_idx - 1], projection='3d') ax.view_init(18, -70) # if FLAGS.cache_on_fail: # if np.min(poses3d) < -1000: # poses3d = before_pose p3d = poses3d logger.info("frame score {0}".format(np.min(poses3d))) x, y, z = [[] for _ in range(3)] if not poses3d is None: to_export = poses3d.tolist()[0] else: to_export = [0.0 for _ in range(96)] logger.info("export {0}".format(to_export)) for o in range(0, len(to_export), 3): x.append(to_export[o]) y.append(to_export[o + 1]) z.append(to_export[o + 2]) export_units[frame] = {} for jnt_index, (_x, _y, _z) in enumerate(zip(x, y, z)): export_units[frame][jnt_index] = {"translate": [_x, _y, _z]} viz.show3Dpose(p3d, ax, lcolor="#9b59b6", rcolor="#2ecc71") pngName = 'png/pose_frame_{0}.png'.format(str(frame).zfill(12)) plt.savefig(pngName) if FLAGS.write_gif: png_lib.append(imageio.imread(pngName)) # if FLAGS.cache_on_fail: # before_pose = poses3d if FLAGS.write_gif: if FLAGS.interpolation: # take every frame on gif_fps * multiplier_inv png_lib = np.array([ png_lib[png_image] for png_image in range(0, len(png_lib), int(multiplier_inv)) ]) logger.info("creating Gif gif_output/animation.gif, please Wait!") imageio.mimsave('gif_output/animation.gif', png_lib, fps=FLAGS.gif_fps) _out_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'maya/3d_data.json') twod_out_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'maya/2d_data.json') with open(_out_file, 'w') as outfile: logger.info("exported maya json to {0}".format(_out_file)) json.dump(export_units, outfile) with open(twod_out_file, 'w') as outfile: logger.info("exported maya json to {0}".format(twod_out_file)) json.dump(twod_export_units, outfile) logger.info("Done!".format(pngName))