def __init__(self, env_name='walk'): self.world = pydart.World(1. / 1200., "../data/woody_with_ground_v2.xml") self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.Kp, self.Kd = 400., 40. self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.env_name = env_name self.ref_motion = None # type: ym.Motion if env_name == 'walk': self.ref_motion = yf.readBvhFile( "../data/woody_walk_normal.bvh")[40:] elif env_name == 'spiral_walk': self.ref_motion = yf.readBvhFile( "../data/wd2_spiral_walk_normal05.bvh") elif env_name == 'walk_spin': self.ref_motion = yf.readBvhFile( "../data/wd2_2foot_walk_turn2.bvh") elif env_name == 'jump': self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[164:280] elif env_name == 'walk_fast': self.ref_motion = yf.readBvhFile( "../data/wd2_WalkForwardVFast00.bvh") elif env_name == 'walk_left_90': self.ref_motion = yf.readBvhFile("../data/walk_left_90degree.bvh") elif env_name == 'walk_left_45': self.ref_motion = yf.readBvhFile("../data/walk_left_45degree.bvh") elif env_name == 'walk_pick': self.ref_motion = yf.readBvhFile("../data/wd2_pick_walk_1.bvh") elif env_name == 'walk_u_turn': self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214] self.ref_motion.translateByOffset([0., 0.03, 0.]) elif env_name == 'jump_whole': self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[315:966] elif env_name == 'walk_u_turn_whole': self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh") self.ref_motion.translateByOffset([0., 0.03, 0.]) self.ref_world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.ref_skel = self.ref_world.skeletons[1] # self.step_per_frame = round((1./self.world.time_step()) / self.ref_motion.fps) self.step_per_frame = 40 self.rsi = True self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.motion_len = len(self.ref_motion) self.motion_time = len(self.ref_motion) / self.ref_motion.fps self.time_offset = 0. state_num = 1 + (3 * 3 + 4) * self.body_num action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None self.phase_frame = 0 # max training time self.training_time = 3. self.evaluation = False self.visualize = False self.visualize_select_motion = 0
class HpDartEnv(gym.Env): def __init__(self, env_name='walk'): self.skel_file_name = '../data/cmu_with_ground.xml' self.world = pydart.World(1. / 1200., self.skel_file_name) self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.Kp, self.Kd = 400., 40. self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.env_name = env_name self.ref_world = pydart.World(1. / 1200., self.skel_file_name) self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = 40 self.rnn = RNNController(env_name) self.RNN_MOTION_SCALE = 0.01 self.rnn_joint_list = [ "Head", "Hips", "LHipJoint", "LeftArm", "LeftFoot", "LeftForeArm", "LeftHand", "LeftLeg", "LeftShoulder", "LeftToeBase", "LeftUpLeg", "LowerBack", "Neck", "Neck1", "RHipJoint", "RightArm", "RightFoot", "RightForeArm", "RightHand", "RightLeg", "RightShoulder", "RightToeBase", "RightUpLeg", "Spine", "Spine1" ] self.rnn_target_update_prob = 1. / 150. self.ik = DartIk(self.ref_skel) self.rsi = True self.w_g = 0.3 self.w_p = 0.65 * .7 self.w_v = 0.1 * .7 self.w_e = 0.15 * .7 self.w_c = 0.1 * .7 self.exp_g = 2.5 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftHand'), self.skel.bodynode_index('RightHand') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.time_offset = 0. self.step_num = 0 # state_num = 2 + (3*3 + 4) * self.body_num state_num = 2 + 2 * 3 * self.body_num + 3 * (self.skel.num_dofs() - 3) action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None self.phase_frame = 0 self.goal = np.zeros(2) self.goal_prev = self.goal.copy() self.goal_in_world_frame = np.zeros(3) self.first = True self.prev_ref_q = self.ref_skel.positions() self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_p_e = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() self.prev_ref_com = self.ref_skel.com() def state(self): pelvis = self.skel.body(0) p_pelvis = pelvis.world_transform()[:3, 3] R_pelvis = pelvis.world_transform()[:3, :3] state = [self.goal[0] / 5., self.goal[1] / 5.] # p = np.array([np.dot(R_pelvis.T, body.to_world() - p_pelvis) for body in self.skel.bodynodes]).flatten() # R = np.array([mm.rot2quat(np.dot(R_pelvis.T, body.world_transform()[:3, :3])) for body in self.skel.bodynodes]).flatten() # v = np.array([np.dot(R_pelvis.T, body.world_linear_velocity()) for body in self.skel.bodynodes]).flatten() # w = np.array([np.dot(R_pelvis.T, body.world_angular_velocity())/20. for body in self.skel.bodynodes]).flatten() # # state.extend(p) # state.extend(R) # state.extend(v) # state.extend(w) p = 0.8 * np.array([ np.dot(R_pelvis.T, body.to_world() - p_pelvis) for body in self.skel.bodynodes ]).flatten() v = 0.2 * np.array([ np.dot(R_pelvis.T, body.world_linear_velocity()) for body in self.skel.bodynodes ]).flatten() _R = 2. * self.skel.positions() _w = .2 * self.skel.velocities() R = np.append(_R[:3], _R[6:]).flatten() w = np.append(_w[:3], _w[6:]).flatten() _ref_R = 2. * self.ref_skel.positions() ref_R = np.append(_ref_R[:3], _ref_R[6:]).flatten() state.extend(p) state.extend(R) state.extend(v) state.extend(w) state.extend(ref_R) return np.asarray(state).flatten() def reward(self): p_e = np.asarray( [body.world_transform()[:3, 3] for body in self.body_e]).flatten() return exp_reward_term(self.w_p, self.exp_p, self.skel.position_differences(self.prev_ref_q, self.skel.q)) \ + exp_reward_term(self.w_v, self.exp_v, self.skel.velocity_differences(self.prev_ref_dq, self.skel.dq)) \ + exp_reward_term(self.w_e, self.exp_e, p_e - self.prev_ref_p_e) \ + exp_reward_term(self.w_c, self.exp_c, self.skel.com() - self.prev_ref_com) \ + exp_reward_term(self.w_g, self.exp_g, self.prev_goal) def is_done(self): if self.skel.com()[1] < 0.4 or self.skel.com()[1] > 3.0: return True elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan( np.asarray(self.skel.dq)): return True elif self.world.time() > 10.: return True return False def step(self, _action): """Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (observation, reward, done, info). # Arguments action (object): An action provided by the environment. # Returns observation (object): Agent's observation of the current environment. reward (float) : Amount of reward returned after previous action. done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results. info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). """ action = np.hstack((np.zeros(6), _action / 10.)) for i in range(self.step_per_frame): # self.skel.set_forces(self.skel.get_spd(self.ref_skel.q + action, self.world.time_step(), self.Kp, self.Kd)) self.skel.set_forces( self.pdc.compute_flat(self.ref_skel.q + action)) self.world.step() if random() < self.rnn_target_update_prob: self.sample_target() self.update_goal_in_local_frame(False) self.get_rnn_ref_pose_step(False) return tuple([self.state(), self.reward(), self.is_done(), dict()]) def reset(self): """ Resets the state of the environment and returns an initial observation. # Returns observation (object): The initial observation of the space. Initial reward is assumed to be 0. """ self.world.reset() if self.first: self.sample_target() self.get_rnn_ref_pose_step(True) self.skel.set_positions(self.ref_skel.positions()) self.skel.set_velocities(self.ref_skel.velocities()) self.first = False return self.state() def sample_target(self): angle = 2. * pi * random() radius = 5. * random() self.goal_in_world_frame = self.skel.body(0).to_world() + radius * ( cos(angle) * mm.unitX() - sin(angle) * mm.unitZ()) self.goal_in_world_frame[1] = 0. def update_goal_in_local_frame(self, reset=False): if not reset: self.prev_goal = self.goal.copy() body_transform = self.skel.body(0).world_transform() goal_vector_in_world_frame = self.goal_in_world_frame - body_transform[: 3, 3] goal_vector_in_world_frame[1] = 0. radius = mm.length(goal_vector_in_world_frame) unit_goal_vector_in_world_frame = mm.normalize( goal_vector_in_world_frame) root_x_in_world_plane = body_transform[:3, 0] root_x_in_world_plane[1] = 0. unit_root_x_in_world_plane = mm.seq2Vec3( mm.normalize(root_x_in_world_plane)) unit_root_z_in_world_plane = mm.cross(unit_root_x_in_world_plane, mm.unitY()) # angle = atan2(np.dot(unit_root_x_in_world_plane, unit_goal_vector_in_world_frame), np.dot(unit_root_z_in_world_plane, unit_goal_vector_in_world_frame)) self.goal = radius * np.array([ np.dot(unit_root_x_in_world_plane, unit_goal_vector_in_world_frame), np.dot(unit_root_z_in_world_plane, unit_goal_vector_in_world_frame) ]) if reset: self.prev_goal = self.goal.copy() def get_rnn_ref_pose_step(self, reset=False): if not reset: self.prev_ref_q = self.ref_skel.positions() self.prev_ref_dq = self.ref_skel.velocities() self.prev_ref_p_e_hat = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() self.prev_ref_com = self.ref_skel.com() p = self.goal_in_world_frame target = Pose2d( [p[0] / self.RNN_MOTION_SCALE, -p[2] / self.RNN_MOTION_SCALE]) target = self.rnn.pose.relativePose(target) target = target.p t_len = v_len(target) if t_len > 80: ratio = 80 / t_len target[0] *= ratio target[1] *= ratio contacts, points, angles, orientations, root_orientation = self.rnn.step( target) for j in range(len(self.ref_skel.joints)): if j == 0: joint = self.ref_skel.joints[j] # type: pydart.FreeJoint joint_idx = self.rnn_joint_list.index(joint.name) hip_angles = mm.logSO3( np.dot(root_orientation, orientations[joint_idx])) # hip_angles = mm.logSO3(root_orientation) joint.set_position( np.array([ hip_angles[0], hip_angles[1], hip_angles[2], points[0][0], points[0][1], points[0][2] ])) continue joint = self.ref_skel.joints[j] # type: pydart.BallJoint joint_idx = self.rnn_joint_list.index(joint.name) joint.set_position(angles[joint_idx * 3:joint_idx * 3 + 3]) self.ik.clean_constraints() self.ik.add_joint_pos_const('LeftForeArm', np.asarray(points[10])) self.ik.add_joint_pos_const('LeftHand', np.asarray(points[2])) self.ik.add_joint_pos_const('LeftLeg', np.asarray(points[11])) self.ik.add_joint_pos_const('LeftFoot', np.asarray(points[3])) if contacts[0] > 0.8 and False: body_transform = self.ref_skel.body('LeftFoot').transform()[:3, :3] angle = acos(body_transform[1, 1]) body_ori = np.dot(body_transform, mm.rotX(-angle)) self.ik.add_orientation_const('LeftFoot', body_ori) self.ik.add_joint_pos_const('RightForeArm', np.asarray(points[12])) self.ik.add_joint_pos_const('RightHand', np.asarray(points[5])) self.ik.add_joint_pos_const('RightLeg', np.asarray(points[13])) self.ik.add_joint_pos_const('RightFoot', np.asarray(points[6])) self.ik.solve() foot_joint_ori = mm.exp(self.ref_skel.joint('LeftFoot').position()) self.ref_skel.joint('LeftFoot').set_position( mm.logSO3(np.dot(foot_joint_ori, np.dot(mm.rotX(-.6), mm.rotZ(.4))))) foot_joint_ori = mm.exp(self.ref_skel.joint('RightFoot').position()) self.ref_skel.joint('RightFoot').set_position( mm.logSO3( np.dot(foot_joint_ori, np.dot(mm.rotX(-.6), mm.rotZ(-.4))))) if not self.first: dq = 30. * self.ref_skel.position_differences( self.ref_skel.positions(), self.prev_ref_q) self.ref_skel.set_velocities(dq) if reset: self.prev_ref_q = self.ref_skel.positions() self.prev_ref_dq = self.ref_skel.velocities() self.prev_ref_p_e_hat = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() self.prev_ref_com = self.ref_skel.com() def render(self, mode='human', close=False): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) # Arguments mode (str): The mode to render with. close (bool): Close all open renderings. """ return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ pass def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ self.np_random, seed = gym.utils.seeding.np_random(seed) return [seed] def GetState(self, param): return self.state() def GetStates(self): return [self.state()] def Resets(self, rsi): self.rsi = rsi self.reset() def Reset(self, rsi, idx): self.rsi = rsi self.reset() def Steps(self, actions): return self.step(actions[0]) def IsTerminalState(self, j): return self.is_done() def GetReward(self, j): return self.reward() def IsTerminalStates(self): return [self.is_done()]
def __init__(self, env_name='walk'): self.skel_file_name = '../data/cmu_with_ground.xml' self.world = pydart.World(1. / 1200., self.skel_file_name) self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.Kp, self.Kd = 400., 40. self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.env_name = env_name self.ref_world = pydart.World(1. / 1200., self.skel_file_name) self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = 40 self.rnn = RNNController(env_name) self.RNN_MOTION_SCALE = 0.01 self.rnn_joint_list = [ "Head", "Hips", "LHipJoint", "LeftArm", "LeftFoot", "LeftForeArm", "LeftHand", "LeftLeg", "LeftShoulder", "LeftToeBase", "LeftUpLeg", "LowerBack", "Neck", "Neck1", "RHipJoint", "RightArm", "RightFoot", "RightForeArm", "RightHand", "RightLeg", "RightShoulder", "RightToeBase", "RightUpLeg", "Spine", "Spine1" ] self.rnn_target_update_prob = 1. / 150. self.ik = DartIk(self.ref_skel) self.rsi = True self.w_g = 0.3 self.w_p = 0.65 * .7 self.w_v = 0.1 * .7 self.w_e = 0.15 * .7 self.w_c = 0.1 * .7 self.exp_g = 2.5 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftHand'), self.skel.bodynode_index('RightHand') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.time_offset = 0. self.step_num = 0 # state_num = 2 + (3*3 + 4) * self.body_num state_num = 2 + 2 * 3 * self.body_num + 3 * (self.skel.num_dofs() - 3) action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None self.phase_frame = 0 self.goal = np.zeros(2) self.goal_prev = self.goal.copy() self.goal_in_world_frame = np.zeros(3) self.first = True self.prev_ref_q = self.ref_skel.positions() self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_p_e = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() self.prev_ref_com = self.ref_skel.com()
def __init__(self, rnn_len): skel_file = '../data/cmu_with_ground.xml' self.world = pydart.World(1./1200., skel_file) self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.Kp, self.Kd = 800., 40. self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.ref_motion = yf.readBvhFile("../data/cmu_tpose.bvh")[:rnn_len] self.ref_motion_len = rnn_len self.ref_world = pydart.World(1./1200., skel_file) self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = 40 self.rsi = True # self.w_g = 0.3 # self.w_p = 0.65 * .7 # self.w_v = 0.1 * .7 # self.w_e = 0.15 * .7 # self.w_c = 0.1 * .7 self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_g = 2.5 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftHand'), self.skel.bodynode_index('RightHand')] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.motion_len = len(self.ref_motion) self.motion_time = len(self.ref_motion) / self.ref_motion.fps state_num = 2 + (3*3 + 4) * self.body_num + (3 + 4) * self.body_num action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi*10./2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None self.phase_frame = 0 self.goals_in_world_frame = list() self.goal = np.zeros(2) self.prev_ref_q = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_p_e_hat = np.asarray([body.world_transform()[:3, 3] for body in self.ref_body_e]).flatten() self.prev_ref_com = self.ref_skel.com() self.prev_goal = np.zeros(2)
def __init__(self): self.world = pydart.World(1./1200., "../data/woody_with_ground.xml") self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.Kp, self.Kd = 400., 40. self.ik = DartIk(self.skel) self.ref_motions = list() # type: list[ym.Motion] self.ref_motions.append(yf.readBvhFile("../data/woody_walk_normal.bvh")[40:]) self.ref_motions.append(yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh")) self.ref_motions.append(yf.readBvhFile("../data/wd2_WalkForwardSlow01.bvh")) self.ref_motions.append(yf.readBvhFile("../data/walk_right_45degree.bvh")[21:134]) self.ref_motions.append(yf.readBvhFile("../data/walk_left_45degree.bvh")[26:141]) # self.ref_motions.append(yf.readBvhFile("../data/walk_left_90degree.bvh")[21:153]) # self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh')) # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280]) # self.ref_motions.append(yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214]) # self.ref_motions[-1].translateByOffset([0., 0.03, 0.]) self.motion_num = len(self.ref_motions) self.reward_weights_by_fps = [self.ref_motions[0].fps / self.ref_motions[i].fps for i in range(self.motion_num)] self.ref_motion = self.ref_motions[0] self.ref_world = pydart.World(1./1200., "../data/woody_with_ground.xml") self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = None self.rsi = True self.specified_motion_num = 0 self.is_motion_specified = False # self.w_g = 0.3 # self.w_p = 0.65 * .7 # self.w_v = 0.1 * .7 # self.w_e = 0.15 * .7 # self.w_c = 0.1 * .7 self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_g = 2.5 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm')] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.motion_len = len(self.ref_motion) self.goals_in_world_frame = list() self.goal = np.zeros(2) self.goals = list() self.goal_window = 45 state_num = 3*self.goal_window + (3*3 + 4) * self.body_num # action_num = self.skel.num_dofs() - 6 action_num = self.skel.num_dofs() state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi*2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None self.phase_frame = 0 self.prev_ref_q = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs()) self.prev_ref_p_e_hat = np.asarray([body.world_transform()[:3, 3] for body in self.ref_body_e]).flatten() self.prev_ref_com = self.ref_skel.com() # setting for reward self.reward_joint = list() self.reward_joint.append('j_RightUpLeg') self.reward_joint.append('j_RightLeg') self.reward_joint.append('j_LeftUpLeg') self.reward_joint.append('j_LeftLeg') self.reward_joint.append('j_Spine') self.reward_joint.append('j_Spine1') self.reward_joint.append('j_RightArm') self.reward_joint.append('j_RightForeArm') self.reward_joint.append('j_LeftArm') self.reward_joint.append('j_LeftForeArm')
class HpDartEnv(gym.Env): def __init__(self, env_name='walk', env_slaves=1): self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.env_name = env_name self.ref_motion = None # type: ym.Motion if env_name == 'walk': self.ref_motion = yf.readBvhFile( "../data/woody_walk_normal.bvh")[40:] elif env_name == 'spiral_walk': self.ref_motion = yf.readBvhFile( "../data/wd2_spiral_walk_normal05.bvh") elif env_name == 'walk_spin': self.ref_motion = yf.readBvhFile( "../data/wd2_2foot_walk_turn2.bvh") elif env_name == 'jump': self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[164:280] elif env_name == 'walk_fast': self.ref_motion = yf.readBvhFile( "../data/wd2_WalkForwardVFast00.bvh") elif env_name == 'walk_left_90': self.ref_motion = yf.readBvhFile("../data/walk_left_90degree.bvh") elif env_name == 'walk_left_45': self.ref_motion = yf.readBvhFile("../data/walk_left_45degree.bvh") elif env_name == 'walk_pick': self.ref_motion = yf.readBvhFile("../data/wd2_pick_walk_1.bvh") elif env_name == 'walk_u_turn': self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214] self.ref_motion.translateByOffset([0., 0.03, 0.]) elif env_name == 'jump_whole': self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh") elif env_name == 'walk_u_turn_whole': self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh") self.ref_motion.translateByOffset([0., 0.03, 0.]) self.ref_world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = round( (1. / self.world.time_step()) / self.ref_motion.fps) self.rsi = True self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.total_time = len(self.ref_motion) / self.ref_motion.fps self.time_offset = 0. state_num = 1 + (3 * 3 + 4) * self.body_num action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None def state(self): p_pelvis = self.skel.body(0).world_transform()[:3, 3] R_pelvis = self.skel.body(0).world_transform()[:3, :3] phase = (self.world.time() + self.time_offset) / self.total_time state = [phase] p = np.array([ self.skel.body(i).to_world() - p_pelvis for i in range(self.body_num) ]).flatten() # R = [mm.logSO3(np.dot(R_pelvis.T, self.skel.body(i).world_transform()[:3, :3]))/pi for i in range(self.body_num)] R = np.array([ mm.rot2quat( np.dot(R_pelvis.T, self.skel.body(i).world_transform()[:3, :3])) for i in range(self.body_num) ]).flatten() v = np.array([ self.skel.body(i).world_linear_velocity() for i in range(self.body_num) ]).flatten() w = np.array([ self.skel.body(i).world_angular_velocity() / 20. for i in range(self.body_num) ]).flatten() state.extend(p) state.extend(R) state.extend(v) state.extend(w) # print('p', np.linalg.norm(p)) # print('R', np.linalg.norm(R)) # print('v', np.linalg.norm(v)) # print('w', np.linalg.norm(w)) return np.asarray(state).flatten() def reward(self): current_frame = min( len(self.ref_motion) - 1, int((self.world.time() + self.time_offset) * self.ref_motion.fps)) self.ref_skel.set_positions(self.ref_motion[current_frame].get_q()) self.ref_skel.set_velocities(self.ref_motion.get_dq(current_frame)) p_e_hat = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() p_e = np.asarray( [body.world_transform()[:3, 3] for body in self.body_e]).flatten() return exp_reward_term(self.w_p, self.exp_p, self.skel.q, self.ref_skel.q) \ + exp_reward_term(self.w_v, self.exp_v, self.skel.dq, self.ref_skel.dq) \ + exp_reward_term(self.w_e, self.exp_e, p_e, p_e_hat) \ + exp_reward_term(self.w_c, self.exp_c, self.skel.com(), self.ref_skel.com()) def is_done(self): if self.skel.com()[1] < 0.4: return True elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan( np.asarray(self.skel.dq)): return True elif self.world.time() + self.time_offset > self.total_time: return True return False def step(self, _action): """Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (observation, reward, done, info). # Arguments action (object): An action provided by the environment. # Returns observation (object): Agent's observation of the current environment. reward (float) : Amount of reward returned after previous action. done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results. info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). """ action = np.hstack((np.zeros(6), _action / 10.)) current_frame = min( len(self.ref_motion) - 1, int((self.world.time() + self.time_offset) * self.ref_motion.fps)) self.ref_skel.set_positions(self.ref_motion[current_frame].get_q()) self.ref_skel.set_velocities(self.ref_motion.get_dq(current_frame)) for i in range(self.step_per_frame): self.skel.set_forces( self.pdc.compute_flat(self.ref_skel.q + action)) self.world.step() return tuple([self.state(), self.reward(), self.is_done(), dict()]) def reset(self): """ Resets the state of the environment and returns an initial observation. # Returns observation (object): The initial observation of the space. Initial reward is assumed to be 0. """ self.world.reset() # rand_frame = randrange(0, len(self.ref_motion)//2) rand_frame = randrange(0, len(self.ref_motion)) if not self.rsi: rand_frame = 0 self.time_offset = rand_frame / self.ref_motion.fps self.skel.set_positions(self.ref_motion[rand_frame].get_q()) dq = self.ref_motion.get_dq(rand_frame) dq[3:6] = np.asarray( self.ref_motion.getDOFVelocitiesLocal(rand_frame)[0][:3]) self.skel.set_velocities(dq) # self.skel.set_velocities(self.ref_motion.get_dq(rand_frame)) return self.state() def render(self, mode='human', close=False): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) # Arguments mode (str): The mode to render with. close (bool): Close all open renderings. """ if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(500, 500) self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2) # rod = rendering.make_capsule(1, .2) # rod.set_color(.8, .3, .3) # rod.add_attr(self.pole_transform) # self.viewer.add_geom(rod) self.body_transform = list() self.ref_body_transform = list() for i in range(self.body_num): axle = rendering.make_circle(.05) axle.set_color(0, 0, 0) self.body_transform.append(rendering.Transform()) axle.add_attr(self.body_transform[i]) self.viewer.add_geom(axle) for i in range(self.body_num): axle = rendering.make_circle(.05) axle.set_color(1, 0, 0) self.ref_body_transform.append(rendering.Transform()) axle.add_attr(self.ref_body_transform[i]) self.viewer.add_geom(axle) for i in range(self.body_num): self.body_transform[i].set_translation( self.skel.body(i).world_transform()[:3, 3][0] - 1., self.skel.body(i).world_transform()[:3, 3][1]) self.ref_body_transform[i].set_translation( self.ref_skel.body(i).world_transform()[:3, 3][0] - 1., self.ref_skel.body(i).world_transform()[:3, 3][1]) return self.viewer.render(return_rgb_array=mode == 'rgb_array') def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ pass def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ self.np_random, seed = gym.utils.seeding.np_random(seed) return [seed] def GetState(self, param): return self.state() def GetStates(self): return [self.state()] def Resets(self, rsi): self.rsi = rsi self.reset() def Reset(self, rsi, param1): self.rsi = rsi self.reset() def Steps(self, actions): return self.step(actions[0]) def IsTerminalState(self, j): return self.is_done() def GetReward(self, j): return self.reward() def IsTerminalStates(self): return [self.is_done()]
def __init__(self, env_slaves=1): self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.Kp, self.Kd = 400., 40. self.ref_motions = list() # type: list[ym.Motion] self.ref_motions.append( yf.readBvhFile("../data/woody_walk_normal.bvh")[40:]) # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280]) self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh')) self.ref_motions.append( yf.readBvhFile("../data/walk_left_90degree.bvh")) self.ref_motions.append( yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214]) self.ref_motions[-1].translateByOffset([0., 0.03, 0.]) self.ref_motions.append( yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh")) self.motion_num = len(self.ref_motions) self.reward_weights_by_fps = [ self.ref_motions[0].fps / self.ref_motions[i].fps for i in range(self.motion_num) ] self.ref_motion = self.ref_motions[0] self.ref_world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = None self.rsi = True self.specified_motion_num = 0 self.is_motion_specified = False self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.motion_time = len(self.ref_motion) / self.ref_motion.fps self.time_offset = 0. state_num = 2 + (3 * 3 + 4) * self.body_num action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None
class HpDartMultiEnv(gym.Env): def __init__(self, env_slaves=1): self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.world.control_skel = self.world.skeletons[1] self.skel = self.world.skeletons[1] self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.) self.Kp, self.Kd = 400., 40. self.ref_motions = list() # type: list[ym.Motion] self.ref_motions.append( yf.readBvhFile("../data/woody_walk_normal.bvh")[40:]) # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280]) self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh')) self.ref_motions.append( yf.readBvhFile("../data/walk_left_90degree.bvh")) self.ref_motions.append( yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214]) self.ref_motions[-1].translateByOffset([0., 0.03, 0.]) self.ref_motions.append( yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh")) self.motion_num = len(self.ref_motions) self.reward_weights_by_fps = [ self.ref_motions[0].fps / self.ref_motions[i].fps for i in range(self.motion_num) ] self.ref_motion = self.ref_motions[0] self.ref_world = pydart.World(1. / 1200., "../data/woody_with_ground.xml") self.ref_skel = self.ref_world.skeletons[1] self.step_per_frame = None self.rsi = True self.specified_motion_num = 0 self.is_motion_specified = False self.w_p = 0.65 self.w_v = 0.1 self.w_e = 0.15 self.w_c = 0.1 self.exp_p = 2. self.exp_v = 0.1 self.exp_e = 40. self.exp_c = 10. self.body_num = self.skel.num_bodynodes() self.idx_e = [ self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'), self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm') ] self.body_e = list(map(self.skel.body, self.idx_e)) self.ref_body_e = list(map(self.ref_skel.body, self.idx_e)) self.motion_time = len(self.ref_motion) / self.ref_motion.fps self.time_offset = 0. state_num = 2 + (3 * 3 + 4) * self.body_num action_num = self.skel.num_dofs() - 6 state_high = np.array([np.finfo(np.float32).max] * state_num) action_high = np.array([pi * 10. / 2.] * action_num) self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32) self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32) self.viewer = None def state(self): pelvis = self.skel.body(0) p_pelvis = pelvis.world_transform()[:3, 3] R_pelvis = pelvis.world_transform()[:3, :3] phase = min(1., (self.world.time() + self.time_offset) / self.motion_time) state = [ 0. if len(self.ref_motions) == 1 else self.specified_motion_num / (len(self.ref_motions) - 1), phase ] p = np.array([ np.dot(R_pelvis.T, body.to_world() - p_pelvis) for body in self.skel.bodynodes ]).flatten() R = np.array([ mm.rot2quat(np.dot(R_pelvis.T, body.world_transform()[:3, :3])) for body in self.skel.bodynodes ]).flatten() v = np.array([ np.dot(R_pelvis.T, body.world_linear_velocity()) for body in self.skel.bodynodes ]).flatten() w = np.array([ np.dot(R_pelvis.T, body.world_angular_velocity()) / 20. for body in self.skel.bodynodes ]).flatten() state.extend(p) state.extend(R) state.extend(v) state.extend(w) # print('p', np.linalg.norm(p)) # print('R', np.linalg.norm(R)) # print('v', np.linalg.norm(v)) # print('w', np.linalg.norm(w)) return np.asarray(state).flatten() def reward(self): # current_frame = min(len(self.ref_motion)-1, int((self.world.time() + self.time_offset) * self.ref_motion.fps)) current_time = self.world.time() + self.time_offset self.ref_skel.set_positions( self.ref_motion.get_q_by_time(current_time)) self.ref_skel.set_velocities( self.ref_motion.get_dq_dart_by_time(current_time)) p_e_hat = np.asarray([ body.world_transform()[:3, 3] for body in self.ref_body_e ]).flatten() p_e = np.asarray( [body.world_transform()[:3, 3] for body in self.body_e]).flatten() reward = exp_reward_term(self.w_p, self.exp_p, self.skel.q, self.ref_skel.q) \ + exp_reward_term(self.w_v, self.exp_v, self.skel.dq, self.ref_skel.dq) \ + exp_reward_term(self.w_e, self.exp_e, p_e, p_e_hat) \ + exp_reward_term(self.w_c, self.exp_c, self.skel.com(), self.ref_skel.com()) return reward * self.reward_weights_by_fps[self.specified_motion_num] def is_done(self): if self.skel.com()[1] < 0.45: return True elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan( np.asarray(self.skel.dq)): return True elif self.world.time() + self.time_offset > self.motion_time: return True return False def step(self, _action): """Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (observation, reward, done, info). # Arguments action (object): An action provided by the environment. # Returns observation (object): Agent's observation of the current environment. reward (float) : Amount of reward returned after previous action. done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results. info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). """ action = np.hstack((np.zeros(6), _action / 10.)) next_frame_time = self.world.time( ) + self.time_offset + self.world.time_step() * self.step_per_frame self.ref_skel.set_positions( self.ref_motion.get_q_by_time(next_frame_time)) self.ref_skel.set_velocities( self.ref_motion.get_dq_dart_by_time(next_frame_time)) for i in range(self.step_per_frame): # self.skel.set_forces(self.skel.get_spd(self.ref_skel.q + action, self.world.time_step(), self.Kp, self.Kd)) self.skel.set_forces( self.pdc.compute_flat(self.ref_skel.q + action)) self.world.step() return tuple([self.state(), self.reward(), self.is_done(), dict()]) def reset(self): """ Resets the state of the environment and returns an initial observation. # Returns observation (object): The initial observation of the space. Initial reward is assumed to be 0. """ self.world.reset() if not self.is_motion_specified: self.specified_motion_num = randrange(0, self.motion_num) self.ref_motion = self.ref_motions[self.specified_motion_num] self.step_per_frame = round( (1. / self.world.time_step()) / self.ref_motion.fps) self.motion_time = len(self.ref_motion) / self.ref_motion.fps rand_frame = randrange(0, len(self.ref_motion)) if self.rsi else 0 self.time_offset = rand_frame / self.ref_motion.fps self.skel.set_positions(self.ref_motion.get_q_by_time( self.time_offset)) dq = self.ref_motion.get_dq_dart_by_time(self.time_offset) self.skel.set_velocities(dq) self.ref_skel.set_positions( self.ref_motion.get_q_by_time(self.time_offset)) return self.state() def render(self, mode='human', close=False): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) # Arguments mode (str): The mode to render with. close (bool): Close all open renderings. """ return self.viewer.render(return_rgb_array=mode == 'rgb_array') def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ pass def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ self.np_random, seed = gym.utils.seeding.np_random(seed) return [seed] def GetState(self, param): return self.state() def GetStates(self): return [self.state()] def Resets(self, rsi): self.rsi = rsi self.reset() def Reset(self, rsi, param1): self.rsi = rsi self.reset() def Steps(self, actions): return self.step(actions[0]) def IsTerminalState(self, j): return self.is_done() def GetReward(self, j): return self.reward() def IsTerminalStates(self): return [self.is_done()] def specify_motion_num(self, num=-1): if 0 <= num < self.motion_num: self.is_motion_specified = True self.specified_motion_num = num else: self.is_motion_specified = False