def test_sizes_of_parsed_data(self): # Instantiate the humanoid environment. env = humanoid_CMU.stand() # Parse and convert specified clip. converted = parse_amc.convert(_TEST_AMC_PATH, env.physics, env.control_timestep()) self.assertEqual(converted.qpos.shape[0], 63) self.assertEqual(converted.qvel.shape[0], 62) self.assertEqual(converted.time.shape[0], converted.qpos.shape[1]) self.assertEqual(converted.qpos.shape[1], converted.qvel.shape[1] + 1) # Parse and convert specified clip -- WITH SMALLER TIMESTEP converted2 = parse_amc.convert(_TEST_AMC_PATH, env.physics, 0.5 * env.control_timestep()) self.assertEqual(converted2.qpos.shape[0], 63) self.assertEqual(converted2.qvel.shape[0], 62) self.assertEqual(converted2.time.shape[0], converted2.qpos.shape[1]) self.assertEqual(converted.qpos.shape[1], converted.qvel.shape[1] + 1) # Compare sizes of parsed objects for different timesteps self.assertEqual(converted.qpos.shape[1] * 2, converted2.qpos.shape[1])
def main(unused_argv): env = humanoid_CMU.stand() # Parse and convert specified clip. converted = parse_amc.convert(FLAGS.filename, env.physics, env.control_timestep()) max_frame = min(FLAGS.max_num_frames, converted.qpos.shape[1] - 1) width = 480 height = 480 video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) for i in range(max_frame): p_i = converted.qpos[:, i] with env.physics.reset_context(): env.physics.data.qpos[:] = p_i video[i] = np.hstack([env.physics.render(height, width, camera_id=0), env.physics.render(height, width, camera_id=1)]) tic = time.time() for i in range(max_frame): if i == 0: img = plt.imshow(video[i]) else: img.set_data(video[i]) toc = time.time() clock_dt = toc - tic tic = time.time() # Real-time playback not always possible as clock_dt > .03 plt.pause(max(0.01, 0.03 - clock_dt)) # Need min display time > 0.0. plt.draw() plt.waitforbuttonpress()
def getNumFrames(filename): env = humanoid_CMU.stand() # Parse and convert specified clip. converted = parse_amc.convert(filename, env.physics, env.control_timestep()) frame_num = converted.qpos.shape[1] - 1 return frame_num
def train(args): from model.encoder import bi_direction_lstm from model.action_decoder import MlpPolicy from model.mlp_state_decoder import MlpPolicy_state U.make_session(num_cpu=1).__enter__() env = humanoid_CMU.stand() obs_space = env.physics.data.qpos ac_space = env.action_spec() def encoder(name): return bi_direction_lstm(name=name, obs_space=obs_space, batch_size=args.lstm_batch, time_steps=args.time_steps, LSTM_size=args.LSTM_size, laten_size=args.laten_size) def action_decorder(name): return MlpPolicy(name=name, obs_space=obs_space, ac_space=ac_space, embedding_shape=args.laten_size, hid_size=args.pol_hid_size, num_hid_layers=args.pol_layers) def state_decorder(name): return MlpPolicy_state(name=name, obs_space=obs_space, embedding_shape=args.laten_size, hid_size=args.state_de_hid_size, num_hid_layers=args.state_de_hid_num) state_dataset = load_state_dataset(args.state_dir_path, env, args.control_timestep) learn(encoder=encoder, action_decorder=action_decorder, state_decorder=state_decorder, embedding_shape=args.laten_size, dataset=state_dataset, logdir=args.logdir, batch_size=args.lstm_batch, time_steps=args.time_steps, epsilon=args.epsilon, lr_rate=args.lr_rate)
def train(args): from model.encoder import bi_direction_lstm from model.action_decoder import MlpPolicy from model.WaveNet import WaveNetModel U.make_session(num_cpu=1).__enter__() env = humanoid_CMU.stand() obs_space = env.physics.data.qpos ac_space = env.action_spec() def encoder(name): return bi_direction_lstm(name=name, obs_space=obs_space, batch_size=args.lstm_batch, time_steps= args.time_steps, LSTM_size= args.LSTM_size, laten_size = args.laten_size) def action_decorder(name): return MlpPolicy(name=name, obs_space = obs_space, ac_space = ac_space, embedding_shape = args.laten_size, hid_size = pol_hid_size, num_hid_layers = pol_layers) with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) def state_decorder(name): ##也要加个name return WaveNetModel( name = name, obs_shape= obs_space, embedding_shape= args.laten_size, batch_size=args.time_steps, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels) state_dataset = load_state_dataset(args.state_dir_path, env, args.control_timestep) ##感觉数据会有点少,可以尝试多加一点走路的数 optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) learn(env=env, encoder = encoder, action_decorder=action_decorder, state_decorder=state_decorder, embedding_shape= args.laten_size ,dataset=state_dataset, optimizer = optimizer, logdir=args.logdir, batch_size = args.lstm_batch, time_steps = args.time_steps)
def parsedata(filename, sim): output = dict() file = open(filename, 'r') lines = file.read().split('\n') pos_list = dict() # dict(joint_id, [list of positions for all frames]) frame_count = 0 num_joints = 0 init_pos = [] j = 1 # split by joints for joints in lines: if joints != '': num_joints += 1 # split by frames frames = joints.split(',') if j not in pos_list.keys(): pos_list[j] = [] # add position of each joint per frame for f in frames: positions = f.split(' ') positions = np.array([ float(positions[0]), float(positions[1]), float(positions[2]) ]) pos_list[j].append(positions) if j == 1: frame_count += 1 # if j == 11: # print(positions) j += 1 # build initial joint positions (for frame 0) for i in range(1, num_joints + 1): init_pos.append(pos_list[i][0]) env = humanoid_CMU.stand() max_frame = frame_count width = 480 height = 480 video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) #createjointcsv(output_filename) #createbodiescsv(os.path.basename(filename).split('.')[0] + "_bodies.csv") output_world = [] output_com = [] output_parent = [] joints = dict() bodies = dict() file = open(filename + ".amc", "w") for i in range(int(max_frame)): file.write(str(i + 1) + '\n') output['lclavicle'] = "lclavicle 0 0" output['rclavicle'] = "rclavicle 0 0" output['thorax'] = "thorax 0 0 0" with env.physics.reset_context(): for j in range(1, num_joints + 1): joint_names = JOINT_MAPPING[j] cur_pos = pos_list[j][i] joint_index = env.physics.model.name2id( joint_names[0], 'joint') body_index = env.physics.model.name2id( JOINT_BODIES[joint_index - 1], 'body') body_pos = env.physics.data.xpos[body_index] body_rot = env.physics.data.xmat[body_index].reshape(3, 3) joint_pos = pos_list[j][i] body_name = env.physics.model.id2name(body_index, "body") output[body_name] = body_name if j == 1: output["root"] = "root " + str( pos_list[j][i][0]) + " " + str(pos_list[j][i][1] + 17) + " " + str( pos_list[j][i][2]) init_joint_vec = (init_pos[j - 1] - body_pos).dot(body_rot) # convert positions to body-frame reference body_frame_pos_cur = np.matmul( np.subtract(joint_pos, body_pos), body_rot) for name in joint_names: if name[-2:] == 'rz': dot = min(1, (np.dot( np.array([init_joint_vec[0], init_joint_vec[1]]), np.array( [body_frame_pos_cur[0], body_frame_pos_cur[1]]) ) / (np.linalg.norm( np.array([init_joint_vec[0], init_joint_vec[1]])) * np.linalg.norm( np.array([ body_frame_pos_cur[0], body_frame_pos_cur[1] ]))))) angle = np.arccos(dot) joint_index = env.physics.model.name2id(name, 'joint') env.physics.data.qpos[joint_index + 6] = np.radians( INIT_QPOS[joint_index - 1]) + angle output[body_name] = output[body_name] + " " + str( np.degrees(angle) + INIT_QPOS[joint_index - 1]) if j == 1: output["root"] = output["root"] + " " + str( np.degrees(angle)) elif name[-2:] == 'rx': dot = min(1, (np.dot( np.array([init_joint_vec[1], init_joint_vec[2]]), np.array( [body_frame_pos_cur[1], body_frame_pos_cur[2]]) ) / (np.linalg.norm( np.array([init_joint_vec[1], init_joint_vec[2]])) * np.linalg.norm( np.array([ body_frame_pos_cur[1], body_frame_pos_cur[2] ]))))) angle = np.arccos(dot) joint_index = env.physics.model.name2id(name, 'joint') env.physics.data.qpos[joint_index + 6] = np.radians( INIT_QPOS[joint_index - 1]) + angle output[body_name] = output[body_name] + " " + str( np.degrees(angle)) if j == 1: output["root"] = output["root"] + " " + str( np.degrees(angle) + INIT_QPOS[joint_index - 1]) elif name[-2:] == 'ry': dot = min(1, (np.dot( np.array([init_joint_vec[0], init_joint_vec[2]]), np.array( [body_frame_pos_cur[0], body_frame_pos_cur[2]]) ) / (np.linalg.norm( np.array([init_joint_vec[0], init_joint_vec[2]])) * np.linalg.norm( np.array([ body_frame_pos_cur[0], body_frame_pos_cur[2] ]))))) angle = np.arccos(dot) joint_index = env.physics.model.name2id(name, 'joint') env.physics.data.qpos[joint_index + 6] = np.radians( INIT_QPOS[joint_index - 1]) + angle output[body_name] = output[body_name] + " " + str( np.degrees(angle) + INIT_QPOS[joint_index - 1]) if j == 1: output["root"] = output["root"] + " " + str( np.degrees(angle)) for name in AMC_ORDER: file.write(output[name] + '\n') env.physics.step() print("Processing frame " + str(i)) #outputbodies(os.path.basename(filename).split('.')[0] + "_bodies.csv", env.physics, i) # bodies = calcBodyFrames(env.physics) # #joints = outputjoints2csv(output_filename, i, env.physics, bodies, joints) # joints, frame_data_world, frame_data_com, frame_data_parent = buildFeatures(i, env.physics, bodies, joints) # output_world.append(frame_data_world) # output_com.append(frame_data_com) # output_parent.append(frame_data_parent) #outputcppcsv(os.path.basename(filename).split('.')[0] + "_cpp", env.physics) video[i] = np.hstack([ env.physics.render(height, width, camera_id=0), env.physics.render(height, width, camera_id=1) ]) #outputstate2csv(filename + "_state.csv", converted) # writeOutput(file_prefix + "_features_world.txt", output_world) # writeOutput(file_prefix + "_features_com.txt", output_com) # writeOutput(file_prefix + "_features_parent.txt", output_parent) file.close() if sim: #visualizeJoint(49, joints) plt.figure(2) tic = time.time() for i in range(max_frame): if i == 0: img = plt.imshow(video[i]) else: img.set_data(video[i]) toc = time.time() clock_dt = toc - tic tic = time.time() # Real-time playback not always possible as clock_dt > .03 plt.pause(np.maximum(0.01, .03 - clock_dt)) # Need min display time > 0.0. plt.draw() plt.waitforbuttonpress()
def main(args): from ppo1 import mlp_policy ##for policy from model.encoder import bi_direction_lstm from dm_control.suite import humanoid_CMU U.make_session(num_cpu=args.num_cpu).__enter__() set_global_seeds(args.seed) env = humanoid_CMU.stand() obs_space = env.physics.data.qpos ac_space = env.action_spec() def policy_fn(name, ob_space, ac_space, reuse=False): ###mlp policy 要不要用用之前训好的policy,不是的 return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size= [300, 200, 100], num_hid_layers=3) def encoder(name): return bi_direction_lstm(name=name, obs_space=obs_space, batch_size=args.lstm_batch, time_steps= args.time_steps, LSTM_size= args.LSTM_size, laten_size = args.laten_size) lstm_encoder = encoder("lstm_encoder") saver = lstm_encoder.get_trainable_variables() load(saver=saver, sess=tf.get_default_session(), logdir = args.encoder_load_path) ###将encoder的参数load进去 # env = bench.Monitor(env, logger.get_dir() and # osp.join(logger.get_dir(), "monitor.json")) # env.seed(args.seed) # gym.logger.setLevel(logging.WARN) # task_name = get_task_name(args) task_name = "Humanoid-CMU" args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) # dataset = Mujoco_Dset(expert_path=args.expert_path, ret_threshold=args.ret_thres hold, traj_limitation=args.traj_limitation) # ================ Sample trajectory τj from the demonstration ============================= # 相当于expert dataset,仅需要obs即可 from model.VAE import load_state_dataset dataset = load_state_dataset(data_dir_path=args.expert_data_dir, env = env, control_timestep=args.control_timestep) pretrained_weight = None if (args.pretrained and args.task == 'train') or args.algo == 'bc': # Pretrain with behavior cloning from gail import behavior_clone if args.algo == 'bc' and args.task == 'evaluate': behavior_clone.evaluate(env, policy_fn, args.load_model_path, stochastic_policy=args.stochastic_policy) sys.exit() pretrained_weight = behavior_clone.learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, pretrained=args.pretrained, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name) if args.algo == 'bc': sys.exit() from network.adversary import TransitionClassifier # discriminator discriminator = TransitionClassifier(env, args.adversary_hidden_size, hidden_layers = args.adversary_hidden_layers, lr_rate = args.adversary_learning_rate, entcoeff=args.adversary_entcoeff, embedding_shape=args.laten_size) ###embedding_z,现在还没有处理 observations = dataset.get_next_batch(batch_size=128)[0].transpose((1, 0)) ### !!!!这个地方还是稍微有点儿乱啊 embedding_z = lstm_encoder.get_laten_vector(observations) if args.algo == 'trpo': # Set up for MPI seed from mpi4py import MPI rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) from gail import trpo_mpi if args.task == 'train': trpo_mpi.learn(env, policy_fn, discriminator, dataset, embedding_z=None, ##embedding_z这里现在我还没有想好 pretrained=args.pretrained, pretrained_weight=pretrained_weight, g_step=args.g_step, d_step=args.d_step, timesteps_per_batch=1024, max_kl=args.max_kl, cg_iters=10, cg_damping=0.1, max_timesteps=args.num_timesteps, entcoeff=args.policy_entcoeff, gamma=0.995, lam=0.97, vf_iters=5, vf_stepsize=1e-3, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, save_per_iter=args.save_per_iter, load_model_path=args.load_model_path, task_name=task_name) elif args.task == 'evaluate': trpo_mpi.evaluate(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy) else: raise NotImplementedError elif args.algo == 'ppo': # Set up for MPI seed from mpi4py import MPI rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) from gail import ppo_mpi if args.task == 'train': ppo_mpi.learn(env, policy_fn, discriminator, dataset, # pretrained=args.pretrained, pretrained_weight=pretrained_weight, timesteps_per_batch=1024, g_step=args.g_step, d_step=args.d_step, # max_kl=args.max_kl, cg_iters=10, cg_damping=0.1, clip_param= 0.2,entcoeff=args.policy_entcoeff, max_timesteps=args.num_timesteps, gamma=0.99, lam=0.95, # vf_iters=5, vf_stepsize=1e-3, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, d_stepsize=3e-4, schedule='linear', ckpt_dir=args.checkpoint_dir, save_per_iter=100, task=args.task, sample_stochastic=args.stochastic_policy, load_model_path=args.load_model_path, task_name=task_name) elif args.task == 'evaluate': ppo_mpi.evaluate(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy) else: raise NotImplementedError else: raise NotImplementedError env.close()
def parseData(filename, sim=True, maxframe=1000): file = np.load(filename) # set up mujoco simulation env = humanoid_CMU.stand() num_frame = len(file) max_frame = np.minimum(maxframe, num_frame) width = 480 height = 480 if sim: video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) joint_positions = [] rtibia_body_index = env.physics.model.name2id('rtibia', 'body') rfoot_body_index = env.physics.model.name2id('rfoot', 'body') ltibia_body_index = env.physics.model.name2id('ltibia', 'body') lfoot_body_index = env.physics.model.name2id('lfoot', 'body') for i in tqdm(range(int(max_frame))): with env.physics.reset_context(): for j in range(len(file[i])): joint_name = JOINT_MAPPING[j] joint_index = env.physics.model.name2id(joint_name, 'joint') if j == 0 or j == 4: env.physics.data.qpos[ joint_index + 6] = -np.radians(file[i][j]) + np.radians(INIT_QPOS[j]) else: env.physics.data.qpos[joint_index + 6] = np.radians( file[i][j]) + np.radians(INIT_QPOS[j]) env.physics.step() ##### output body/joint positions ##### pos = [ env.physics.data.xpos[lfoot_body_index], env.physics.data.xpos[rfoot_body_index], env.physics.data.xpos[ltibia_body_index], env.physics.data.xpos[rtibia_body_index] ] joint_positions.append(copy.deepcopy(pos)) if sim: video[i] = np.hstack([ env.physics.render(height, width, camera_id=0), env.physics.render(height, width, camera_id=1) ]) outputPositions(filename, joint_positions) if sim: plt.figure(2) tic = time.time() for i in range(max_frame): if i == 0: img = plt.imshow(video[i]) else: img.set_data(video[i]) toc = time.time() clock_dt = toc - tic tic = time.time() # Real-time playback not always possible as clock_dt > .03 plt.pause(np.maximum(0.01, .03 - clock_dt)) # Need min display time > 0.0. plt.draw() plt.waitforbuttonpress()
def parsedata(filename, sim): file = open(filename, 'r') lines = file.read().split('\n') pos_list = [] # [[list of joints for the frame]) num_frames = int(lines[0]) skip_lines = 0 for i in range(num_frames): num_bodies = lines[1 + skip_lines] dont_care = lines[2 + skip_lines] num_joints = int(lines[3 + skip_lines]) frame_info = [] for j in range(num_joints): joint_info = lines[j + 4 + skip_lines].split(" ") pos_x = float(joint_info[0]) pos_y = float(joint_info[1]) pos_z = float(joint_info[2]) ori_w = float(joint_info[7]) ori_x = float(joint_info[8]) ori_y = float(joint_info[9]) ori_z = float(joint_info[10]) frame_info.append( [pos_x, pos_y, pos_z, ori_w, ori_x, ori_y, ori_z]) skip_lines += (3 + num_joints) pos_list.append(frame_info) env = humanoid_CMU.stand() max_frame = num_frames width = 480 height = 480 video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8) joints = dict() for i in range(int(max_frame)): with env.physics.reset_context(): for j in range(len(pos_list[i])): joint_names = JOINT_MAPPING[j + 1] joint_index = env.physics.model.name2id( joint_names[0], 'joint') joint_quat = [ pos_list[i][j][3], pos_list[i][j][4], pos_list[i][j][5], pos_list[i][j][6] ] angles = transforms3d.euler.quat2euler(joint_quat) #parent_quat = [pos_list[i][0][3], pos_list[i][0][4], pos_list[i][0][5], pos_list[i][0][6]] parent_quat = [ pos_list[i][PARENT_JOINT_MAPPING[j] - 1][3], pos_list[i][PARENT_JOINT_MAPPING[j] - 1][4], pos_list[i][PARENT_JOINT_MAPPING[j] - 1][5], pos_list[i][PARENT_JOINT_MAPPING[j] - 1][6] ] parent_angles = transforms3d.euler.quat2euler(parent_quat) relative_angles = np.subtract(angles, parent_angles) if j in range(12, 14) or j in range(16, 18): print(np.degrees(angles)) print(np.degrees(relative_angles)) for name in joint_names: #if isLeft(j): if name[-2:] == 'rz': joint_index = env.physics.model.name2id( name, 'joint') env.physics.data.qpos[joint_index + 6] = relative_angles[0] elif name[-2:] == 'rx': joint_index = env.physics.model.name2id( name, 'joint') env.physics.data.qpos[joint_index + 6] = relative_angles[1] elif name[-2:] == 'ry': joint_index = env.physics.model.name2id( name, 'joint') env.physics.data.qpos[joint_index + 6] = relative_angles[2] # else: # if name[-2:] == 'rz': # joint_index = env.physics.model.name2id(name, 'joint') # env.physics.data.qpos[joint_index + 6] = -relative_angles[1] # elif name[-2:] == 'rx': # joint_index = env.physics.model.name2id(name, 'joint') # env.physics.data.qpos[joint_index + 6] = -relative_angles[2] # elif name[-2:] == 'ry': # joint_index = env.physics.model.name2id(name, 'joint') # env.physics.data.qpos[joint_index + 6] = relative_angles[0] env.physics.step() print("Processing frame " + str(i)) video[i] = np.hstack([ env.physics.render(height, width, camera_id=0), env.physics.render(height, width, camera_id=1) ]) file.close() plt.figure(2) tic = time.time() for i in range(max_frame): if i == 0: img = plt.imshow(video[i]) else: img.set_data(video[i]) toc = time.time() clock_dt = toc - tic tic = time.time() # Real-time playback not always possible as clock_dt > .03 plt.pause(np.maximum(0.01, .03 - clock_dt)) # Need min display time > 0.0. plt.draw() plt.waitforbuttonpress()