def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) return self.viewer
def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(init_width=50, init_height=50) self.viewer.start() self.viewer.set_model(self.model) #self.viewer.cam.elevation = -42.59999990463257 return self.viewer
class ReacherTwoEnv(MujocoEnv, Serializable): FILE = 'reacher.xml' def __init__(self, *args, **kwargs): super(ReacherTwoEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals()) def step(self, a): vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist = -np.linalg.norm(vec) reward_ctrl = 0 #- np.square(a).sum() #reward_close = 0.01*math.log(-reward_dist) reward = reward_dist + reward_ctrl #+ reward_close self.forward_dynamics(a) next_obs = self.get_current_obs() return Step(next_obs, reward, False) #done = False #return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) def viewer_setup(self): self.viewer.cam.trackbodyid = 0 def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(init_width=25, init_height=25) self.viewer.start() self.viewer.set_model(self.model) return self.viewer def reset_mujoco(self, init_state=None): qpos = np.random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos.flat #while True: # self.goal = np.random.uniform(low=-.2, high=.2, size=2) # if np.linalg.norm(self.goal) < 2: break self.goal = np.array([0.1, 0.1]) qpos[-2:] = self.goal qvel = self.init_qvel.flat + np.random.uniform( low=-.005, high=.005, size=self.model.nv) qvel[-2:] = 0 self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl return self.get_current_obs() def get_current_obs(self): theta = self.model.data.qpos.flat[:2] return np.concatenate([ np.cos(theta), np.sin(theta), self.model.data.qpos.flat[2:], self.model.data.qvel.flat[:2], self.get_body_com("fingertip") - self.get_body_com("target") ]) reset_trial = reset_mujoco # shortcut for compatibility.
def get_viewer(self, config=None): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) if config is not None: self.viewer.set_window_pose(config["xpos"], config["ypos"]) self.viewer.set_window_size(config["width"], config["height"]) self.viewer.set_window_title(config["title"]) return self.viewer
class BlockEnv(MujocoEnv, Serializable): global FILE def __init__(self, *args, **kwargs): FILE = './block_stack_{}.xml'.format(num_blocks) # FILE = './block_stack_2.xml' super(BlockEnv, self).__init__(*args, **kwargs, file_path=FILE) Serializable.quick_init(self, locals()) def reset(self, init_state=None): """ :param init_state: (2, 2) positions for the first and the second block. :return: set geom_xpos """ if init_state is None: init_state = (np.random.rand(num_blocks, 2) * loc) - (loc / 2) init_pos = np.zeros((num_blocks * 3)) init_pos[:2] = init_state[0] - self.model.body_pos[1, :2] init_pos[3:5] = init_state[1] - self.model.body_pos[2, :2] full_state = np.concatenate([ init_pos, self.init_qvel.squeeze(), self.init_qacc.squeeze(), self.init_ctrl.squeeze() ]) obs = super(BlockEnv, self).reset(init_state=full_state) return obs def get_current_obs(self): return self.model.data.geom_xpos def viewer_setup(self, config=None): viewer = self.get_viewer() viewer.cam.trackbodyid = 0 viewer.cam.distance = 2.75 viewer.cam.elevation = -60 def step(self, action): done = False reward = 0 self.forward_dynamics(action) next_obs = self.get_current_obs() return Step(next_obs, reward, done) def step_only(self, action): next_state = self.get_current_obs()[1:, :2] + action.reshape(2, 2) return self.reset(init_state=next_state) def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(visible=False) self.viewer.start() self.viewer.set_model(self.model) return self.viewer
class InvertedPendulumTwoEnv(MujocoEnv, Serializable): FILE = 'inverted_pend_two.xml' def __init__(self, *args, **kwargs): super(InvertedPendulumTwoEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals()) def step(self, a): reward = 1.0 self.forward_dynamics(a) ob = self.get_current_obs() notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) #done = not notdone done = False if not notdone: reward = 0 return Step(ob, reward, done) def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(init_width=50, init_height=50) self.viewer.start() self.viewer.set_model(self.model) #self.viewer.cam.elevation = -42.59999990463257 return self.viewer def reset_mujoco(self, init_state=None): qpos = self.init_qpos + np.random.uniform( size=self.model.nq, low=-0.01, high=0.01) qvel = self.init_qvel + np.random.uniform( size=self.model.nv, low=-0.01, high=0.01) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl return self.get_current_obs() reset_trial = reset_mujoco def get_current_obs(self): return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel() def viewer_setup(self): v = self.viewer v.cam.trackbodyid = 0 v.cam.distance = v.model.stat.extent
class HalfCheetahSimulator(BaseSimulator): def __init__(self, **kwargs): super(HalfCheetahSimulator, self).__init__(**kwargs) self._imSz = 512 self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) self.model = MjModel( '../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') self.viewer = None self._pos = {} self._pos['torso'] = np.zeros((3, )) self._range_min = -1 self._range_max = 1 self.body_comvel = 0 self.action = np.zeros((1, 2)) self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.frame_skip = 1 @overrides def step(self, ctrl, loop=False): ctrl = np.clip( ctrl, *(np.array([-1, -1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1, 1]))) self.model.data.ctrl = ctrl # + np.random.normal(size=ctrl.shape) # print('gym', self.model.data.ctrl) for i in range(self.frame_skip): self.model.step() self.model.forward() ind = self.model.body_names.index('torso') self._pos['torso'] = self.model.body_pos[ind] self.body_comvel = self.model.body_comvels[ind] self.action = ctrl @overrides def get_image(self): data, width, height = self.viewer.get_image() self._im = np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :] # print(self._im) return self._im.copy() @overrides def _setup_renderer(self): self.viewer = MjViewer(visible=True, init_width=self._imSz, init_height=self._imSz) self.viewer.start() self.viewer.set_model(self.model) @overrides def render(self): self.viewer.loop_once()
class MujocoEnv(Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise "Mujoco file not specified" file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template( template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) self.model = MjModel(file_path) os.close(tmp_f) else: self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): if init_state is None: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start: start + datum_dim] setattr(self.model.data, datum_name, datum) start += datum_dim @overrides def reset(self, init_state=None): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate([ self.model.data.qpos.flat, self.model.data.qvel.flat ]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) return self.viewer def render(self): viewer = self.get_viewer() viewer.loop_once() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) from rllab.mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError
def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(init_height=1000, init_width=1000) self.viewer.start() self.viewer.set_model(self.model) return self.viewer
class SwimmerUnevenFloorEnv(MujocoEnv, Serializable): """ This SwimmerEnv differs from the one in rllab.envs.mujoco.swimmer_env in the additional initialization options that fix the rewards and observation space. The 'com' is added to the env_infos. Also the get_ori() method is added for the Maze and Gather tasks. """ FILE = 'swimmer_hfield.xml' ORI_IND = 2 @autoargs.arg('ctrl_cost_coeff', type=float, help='cost coefficient for controls') def __init__(self, ctrl_cost_coeff=1e-2, ego_obs=False, sparse_rew=False, *args, **kwargs): self.ctrl_cost_coeff = ctrl_cost_coeff self.ego_obs = ego_obs self.sparse_rew = sparse_rew super(SwimmerUnevenFloorEnv, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals()) def get_current_obs(self): if self.ego_obs: return np.concatenate([ self.model.data.qpos.flat[2:], self.model.data.qvel.flat, ]).reshape(-1) else: return np.concatenate([ self.model.data.qpos.flat, self.model.data.qvel.flat, self.get_body_com("torso").flat, ]).reshape(-1) def get_ori(self): return self.model.data.qpos[self.__class__.ORI_IND] def step(self, action): self.forward_dynamics(action) next_obs = self.get_current_obs() lb, ub = self.action_bounds scaling = (ub - lb) * 0.5 ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum( np.square(action / scaling)) forward_reward = np.linalg.norm(self.get_body_comvel( "torso")) # swimmer has no problem of jumping reward reward = forward_reward - ctrl_cost done = False if self.sparse_rew: if abs(self.get_body_com("torso")[0]) > 100.0: reward = 1.0 done = True else: reward = 0. com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1) ori = self.get_ori() return Step(next_obs, reward, done, com=com, ori=ori) @overrides def log_diagnostics(self, paths, prefix=''): progs = [ np.linalg.norm(path["env_infos"]["com"][-1] - path["env_infos"]["com"][0]) for path in paths ] logger.record_tabular_misc_stat('Progress', progs) self.plot_visitations(paths, visit_prefix=prefix) @overrides def get_viewer(self): if self.viewer is None: self.viewer = MjViewer(init_height=1000, init_width=1000) self.viewer.start() self.viewer.set_model(self.model) return self.viewer
class MujocoEnv(Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__( self, action_noise=0.0, file_path=None, template_args=None, random_init_state=False, ): #Haoran: even if random_init_state # compile template if file_path is None: if self.__class__.FILE is None: raise "Mujoco file not specified" file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) self.model = MjModel(file_path) os.close(tmp_f) # os.remove(file_path) # TODO else: self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.init_damping = (self.model.dof_damping[:, 0]).copy() self.init_armature = (self.model.dof_armature[:, 0]).copy() self.init_frictionloss = (self.model.dof_frictionloss[:, 0]).copy() self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise self.random_init_state = random_init_state if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self, init_state=None): # init_state = (0.387, 1.137, -2.028, -1.744, 2.029, -0.873, 1.55, 0, 0) if init_state is None: if self.random_init_state: self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 else: self.model.data.qpos = self.init_qpos self.model.data.qvel = self.init_qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl else: start = 0 for datum_name in ["qpos", "qvel", "qacc", "ctrl"]: datum = getattr(self.model.data, datum_name) datum_dim = datum.shape[0] datum = init_state[start:start + datum_dim] if len(datum) == 0: datum = getattr(self, 'init_' + datum_name) setattr(self.model.data, datum_name, datum) start += datum_dim self.model.forward() # print("inside mujoco reset: ", self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl) @overrides def reset(self, init_state=None, *args, **kwargs): self.reset_mujoco(init_state) self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) # print("outside mujoco reset: ", self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate( [self.model.data.qpos.flat, self.model.data.qvel.flat]) @property def _full_state(self): return np.concatenate([ self.model.data.qpos, self.model.data.qvel, self.model.data.qacc, self.model.data.ctrl, ]).ravel() def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self, config=None): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) if config is not None: self.viewer.set_window_pose(config["xpos"], config["ypos"]) self.viewer.set_window_size(config["width"], config["height"]) self.viewer.set_window_title(config["title"]) return self.viewer def setup_camera(self, cam_pos=None, viewer=None): """Setup camera """ if cam_pos is None: cam_pos = self._params["cam_pos"] if viewer is None: viewer = self._viewer viewer.cam.lookat[0] = cam_pos[0] viewer.cam.lookat[1] = cam_pos[1] viewer.cam.lookat[2] = cam_pos[2] viewer.cam.distance = cam_pos[3] viewer.cam.elevation = cam_pos[4] viewer.cam.azimuth = cam_pos[5] viewer.cam.trackbodyid = -1 def render(self, close=False, mode='human', config=None): if mode == 'human': viewer = self.get_viewer(config=config) viewer.loop_once() elif mode == 'rgb_array': viewer = self.get_viewer(config=config) viewer.loop_once() # self.get_viewer(config=config).render() data, width, height = self.get_viewer(config=config).get_image() return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :] if close: self.stop_viewer() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() self.viewer = None def release(self): # temporarily alleviate the issue (but still some leak) from rllab.mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print("qpos dim:\t%d" % len(self.model.data.qpos)) def action_from_key(self, key): raise NotImplementedError def set_state_tmp(self, state, restore=True): if restore: prev_pos = self.model.data.qpos prev_qvel = self.model.data.qvel prev_ctrl = self.model.data.ctrl prev_act = self.model.data.act qpos, qvel = self.decode_state(state) self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.forward() yield if restore: self.model.data.qpos = prev_pos self.model.data.qvel = prev_qvel self.model.data.ctrl = prev_ctrl self.model.data.act = prev_act self.model.forward() def get_param_values(self): return {} def set_param_values(self, values): pass
class PR2Env(MujocoEnv, Serializable): FILE = 'pr2_legofree.xml' def __init__(self, model='pr2.xml', *args, **kwargs): self.goal = None self.action_penalty = 0.5 * 1e-2 if model not in [None, 0]: self.set_model(model) super(PR2Env, self).__init__(*args, **kwargs) Serializable.quick_init(self, locals()) def set_model(self, model): self.__class__.FILE = model def get_current_obs(self): return np.concatenate([ self.model.data.qpos.flat[:-3], self.model.data.qvel. flat[: -3], # Do not include the velocity of the target (should be 0). self.get_vec_tip_to_goal(), ]).reshape(-1) def get_tip_position(self): return self.model.data.site_xpos[0] def get_vec_tip_to_goal(self): tip_position = self.get_tip_position() goal_position = self.goal vec_tip_to_goal = goal_position - tip_position return vec_tip_to_goal def step(self, action): vec_tip_to_goal = self.get_vec_tip_to_goal() self.forward_dynamics(action) lb, ub = self.action_bounds scaling = (ub - lb) * 0.5 reward_ctrl = -self.action_penalty * np.sum(np.square( action / scaling)) distance_tip_to_goal = np.sum(np.square(vec_tip_to_goal)) reward_tip = -distance_tip_to_goal reward = reward_tip + reward_ctrl # + reward_occlusion done = False self.time_step += 1 if self.max_path_length and self.time_step > self.max_path_length: done = True ob = self.get_current_obs() return ob, float(reward), done, {} def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) self.viewer.cam.camid = 0 return self.viewer @overrides def reset_mujoco(self, qpos=None, qvel=None): qpos = self.init_qpos + np.random.normal( size=self.init_qpos.shape) * 0.01 self.goal = np.random.uniform([0.4, 0.25, 0.6], [0.6, 0.75, 1.]) qpos[-3:, 0] = self.goal qvel = self.init_qvel + np.random.normal( size=self.init_qvel.shape) * 0.1 qvel[-3:, 0] = 0 self.model.data.qpos = qpos self.model.data.qvel = qvel self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl
class MujocoEnv(Env): FILE = None @autoargs.arg('action_noise', type=float, help='Noise added to the controls, which will be ' 'proportional to the action bounds') def __init__(self, action_noise=0.0, file_path=None, template_args=None): # compile template if file_path is None: if self.__class__.FILE is None: raise "Mujoco file not specified" file_path = osp.join(MODEL_DIR, self.__class__.FILE) if file_path.endswith(".mako"): lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template( template_file.read(), lookup=lookup) content = template.render( opts=template_args if template_args is not None else {}, ) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) self.model = MjModel(file_path) os.close(tmp_f) else: self.model = MjModel(file_path) self.data = self.model.data self.viewer = None self.init_qpos = self.model.data.qpos self.init_qvel = self.model.data.qvel self.init_qacc = self.model.data.qacc self.init_ctrl = self.model.data.ctrl self.qpos_dim = self.init_qpos.size self.qvel_dim = self.init_qvel.size self.ctrl_dim = self.init_ctrl.size self.action_noise = action_noise if "frame_skip" in self.model.numeric_names: frame_skip_id = self.model.numeric_names.index("frame_skip") addr = self.model.numeric_adr.flat[frame_skip_id] self.frame_skip = int(self.model.numeric_data.flat[addr]) else: self.frame_skip = 1 if "init_qpos" in self.model.numeric_names: init_qpos_id = self.model.numeric_names.index("init_qpos") addr = self.model.numeric_adr.flat[init_qpos_id] size = self.model.numeric_size.flat[init_qpos_id] init_qpos = self.model.numeric_data.flat[addr:addr + size] self.init_qpos = init_qpos self.dcom = None self.current_com = None self.reset() super(MujocoEnv, self).__init__() @property @overrides def action_space(self): bounds = self.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] return spaces.Box(lb, ub) @property @overrides def observation_space(self): shp = self.get_current_obs().shape ub = BIG * np.ones(shp) return spaces.Box(ub * -1, ub) @property def action_bounds(self): return self.action_space.bounds def reset_mujoco(self): self.model.data.qpos = self.init_qpos + \ np.random.normal(size=self.init_qpos.shape) * 0.01 self.model.data.qvel = self.init_qvel + \ np.random.normal(size=self.init_qvel.shape) * 0.1 self.model.data.qacc = self.init_qacc self.model.data.ctrl = self.init_ctrl @overrides def reset(self): self.reset_mujoco() self.model.forward() self.current_com = self.model.data.com_subtree[0] self.dcom = np.zeros_like(self.current_com) return self.get_current_obs() def get_current_obs(self): return self._get_full_obs() def _get_full_obs(self): data = self.model.data cdists = np.copy(self.model.geom_margin).flat for c in self.model.data.contact: cdists[c.geom2] = min(cdists[c.geom2], c.dist) obs = np.concatenate([ data.qpos.flat, data.qvel.flat, # data.cdof.flat, data.cinert.flat, data.cvel.flat, # data.cacc.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, data.qfrc_constraint.flat, cdists, # data.qfrc_bias.flat, # data.qfrc_passive.flat, self.dcom.flat, ]) return obs @property def _state(self): return np.concatenate([ self.model.data.qpos.flat, self.model.data.qvel.flat ]) def inject_action_noise(self, action): # generate action noise noise = self.action_noise * \ np.random.normal(size=action.shape) # rescale the noise to make it proportional to the action bounds lb, ub = self.action_bounds noise = 0.5 * (ub - lb) * noise return action + noise def forward_dynamics(self, action): self.model.data.ctrl = self.inject_action_noise(action) for _ in range(self.frame_skip): self.model.step() self.model.forward() new_com = self.model.data.com_subtree[0] self.dcom = new_com - self.current_com self.current_com = new_com def get_viewer(self): if self.viewer is None: self.viewer = MjViewer() self.viewer.start() self.viewer.set_model(self.model) return self.viewer def render(self): viewer = self.get_viewer() viewer.loop_once() def start_viewer(self): viewer = self.get_viewer() if not viewer.running: viewer.start() def stop_viewer(self): if self.viewer: self.viewer.finish() def release(self): # temporarily alleviate the issue (but still some leak) from rllab.mujoco_py.mjlib import mjlib mjlib.mj_deleteModel(self.model._wrapped) mjlib.mj_deleteData(self.data._wrapped) def get_body_xmat(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.xmat[idx].reshape((3, 3)) def get_body_com(self, body_name): idx = self.model.body_names.index(body_name) return self.model.data.com_subtree[idx] def get_body_comvel(self, body_name): idx = self.model.body_names.index(body_name) return self.model.body_comvels[idx] def print_stats(self): super(MujocoEnv, self).print_stats() print "qpos dim:\t%d" % len(self.model.data.qpos) def action_from_key(self, key): raise NotImplementedError
def _setup_renderer(self): self.viewer = MjViewer(visible=True, init_width=self._imSz, init_height=self._imSz) self.viewer.start() self.viewer.set_model(self.model)