Пример #1
0
    def play(self, mocap_filepath):
        from mujoco_py import load_model_from_xml, MjSim, MjViewer

        curr_path = getcwd()
        xmlpath = '/mujoco/humanoid_deepmimic/envs/asset/dp_env_v2.xml'
        with open(curr_path + xmlpath) as fin:
            MODEL_XML = fin.read()

        model = load_model_from_xml(MODEL_XML)
        sim = MjSim(model)
        viewer = MjViewer(sim)

        self.read_raw_data(mocap_filepath)
        self.convert_raw_data()

        from time import sleep

        phase_offset = np.array([0.0, 0.0, 0.0])

        while True:
            for k in range(len(self.data)):
                tmp_val = self.data_config[k]
                sim_state = sim.get_state()
                sim_state.qpos[:] = tmp_val[:]
                sim_state.qpos[:3] += phase_offset[:]
                sim.set_state(sim_state)
                sim.forward()
                viewer.render()

            sim_state = sim.get_state()
            phase_offset = sim_state.qpos[:3]
            phase_offset[2] = 0
Пример #2
0
class MujocoSimulation(Simulation):
    def __init__(self, name, total_time=1000, recording_time=[0, 1000]):
        super(MujocoSimulation, self).__init__(name,
                                               total_time=total_time,
                                               recording_time=recording_time)
        self.model = load_model_from_path('mujoco_xmls/' + name + '.xml')
        self.sim = MjSim(self.model)
        self.initial_state = self.sim.get_state()
        self.input_size = len(self.sim.data.ctrl)
        self.reset()
        self.viewer = None

    def reset(self):
        super(MujocoSimulation, self).reset()
        self.sim.set_state(self.initial_state)

    def run(self, reset=True):
        if reset:
            self.reset()
        self.sim.set_state(self.initial_state)
        for i in range(self.total_time):
            self.sim.data.ctrl[:] = self.ctrl_array[i]
            self.sim.step()
            self.trajectory.append(self.sim.get_state())
        self.alreadyRun = True

    def get_trajectory(self, all_info=True):
        if not self.alreadyRun:
            self.run()
        if all_info:
            return self.trajectory.copy()
        else:
            return [x.qpos for x in self.trajectory]

    def get_recording(self, all_info=True):
        traj = self.get_trajectory(all_info=all_info)
        return traj[self.recording_time[0]:self.recording_time[1]]

    def watch(self, repeat_count=4):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        for _ in range(repeat_count):
            self.sim.set_state(self.initial_state)
            for i in range(self.total_time):
                self.sim.data.ctrl[:] = self.ctrl_array[i]
                self.sim.step()
                self.viewer.render()
        self.run(
            reset=False
        )  # so that the trajectory will be compatible with what user watches
Пример #3
0
def find_contact_height(sim: mj.MjSim, iterations: int = 10) -> float:
    """ Find the height we should put our walker at so we are just barely in contact with the ground.

        Runs a binary search on height (assumed to be state[1][2]) such that the body with the minimum z coordinate
        on the ground is just barely in contact with the ground.


        Args:
            sim: the instantiated sim MjSim object you want to find the correct height for
            iterations: number of times to run_util the binary search
        Returns:
            just the height as a float, you'll need to set it yourself

    """

    state = sim.get_state()
    height_guess = state[1][1]
    step = height_guess * 2  # upper range for how much we will search for

    for _ in range(iterations):
        if sim.data.ncon:  # ncon is the number of collisions
            height_guess += step
        else:
            height_guess -= step

        state[1][1] = height_guess
        sim.set_state(state)
        sim.forward()
        step /= 2

    return height_guess
Пример #4
0
def _setup_render_rgb(sim: mujoco_py.MjSim) -> mujoco_py.MjSim:
  # create copy of simulation to customize rendering context
  # flags defined in mjvisualize.h
  render_sim = mujoco_py.MjSim(sim.model)
  render_sim.set_state(sim.get_state())
  render_ctx = mujoco_py.MjRenderContextOffscreen(render_sim)
  render_ctx.scn.stereo = 2 # side-by-side rendering
  return render_sim
Пример #5
0
def run_activations_fcn(est_activations,
                        model_ver=0,
                        timestep=0.005,
                        Mj_render=False):
    #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed
    """
	this function runs the predicted activations generatred from running
	the inverse map on the desired task kinematics
	"""

    #loading data
    #print("loading data")
    #task_kinematics=np.load("task_kinematics.npy")
    #est_task_activations=np.load("est_task_activations.npy")

    model = load_model_from_path(
        "./models/nmi_leg_w_chassis_v{}.xml".format(model_ver))
    sim = MjSim(model)
    if Mj_render:
        viewer = MjViewer(sim)
        # viewer.cam.fixedcamid += 1
        # viewer.cam.type = const.CAMERA_FIXED
    sim_state = sim.get_state()
    control_vector_length = sim.data.ctrl.__len__()
    print("control_vector_length: " + str(control_vector_length))

    number_of_task_samples = est_activations.shape[0]

    real_attempt_positions = np.zeros((number_of_task_samples, 2))
    real_attempt_activations = np.zeros((number_of_task_samples, 3))
    chassis_pos = np.zeros(number_of_task_samples, )
    sim.set_state(sim_state)
    for ii in range(number_of_task_samples):
        sim.data.ctrl[:] = est_activations[ii, :]
        sim.step()
        current_positions_array = sim.data.qpos[-2:]

        # current_kinematics_array=np.array(
        # 	[sim.data.qpos[0],
        # 	sim.data.qvel[0],
        # 	sim.data.qacc[0],
        # 	sim.data.qpos[1],
        # 	sim.data.qvel[1],
        # 	sim.data.qacc[1]]
        # 	)
        chassis_pos[ii] = sim.data.get_geom_xpos("Chassis_frame")[0]
        real_attempt_positions[ii, :] = current_positions_array
        real_attempt_activations[ii, :] = sim.data.ctrl
        if Mj_render:
            viewer.render()
    real_attempt_kinematics = positions_to_kinematics_fcn(
        real_attempt_positions[:, 0],
        real_attempt_positions[:, 1],
        timestep=0.005)
    return real_attempt_kinematics, real_attempt_activations, chassis_pos
Пример #6
0
def test_dampingControl():
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    timestep = generatePatternedTrajectories.TIMESTEP
    control_freq = 1/timestep
    total_time = 2
    num_cycles = int(total_time * control_freq)
    plt.ion()
    LW = 1.0
    fig = plt.figure(figsize=(4,15))
    axes = []
    lines = []
    goals = []
    for i in range(7):
        axes.append(fig.add_subplot(7,1,i+1))
        lines.append(axes[i].plot([],[],'b-', lw=LW)[0])
        goals.append(axes[i].plot([],[],'r-', lw=LW)[0])
        axes[i].set_ylim([-1, 1])
        axes[i].set_xlim([0,total_time])
        axes[i].set_ylabel("Angle{}(rad)".format(i), fontsize=8)
        axes[i].set_xlabel("Time(s)", fontsize=8)

    for test in range(5):
        q_init = bounds.getRandPosInBounds()
        qd_goal = np.zeros(7)
        qd_init = np.random.rand(7)
        for g in range(7):
            goals[g].set_ydata(np.ones(num_cycles) * qd_goal[g])
            goals[g].set_xdata(np.linspace(0,3,num_cycles))
        sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={}))
        sim.step()
        sim_time = 0
        for i in range(num_cycles):
            state = sim.get_state()
            q = state[1]
            qd = state[2]
            sim.data.ctrl[:] = controllers.dampingControl(qd=qd)
            sim.step()
            viewer.render()
            if i % 35 == 0:
                for a in range(7):
                    lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time))
                    lines[a].set_ydata(np.append(lines[a].get_ydata(), qd[a]))
                fig.canvas.draw()
                fig.canvas.flush_events()
            sim_time += timestep
            if bounds.outOfBounds(q):
                break
        for i in range(7):
            lines[i].set_xdata([])
            lines[i].set_ydata([])
        time.sleep(1)
Пример #7
0
def run_activations_fcn(MuJoCo_model_name, est_activations, timestep=0.01, Mj_render=False):
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed
	"""
	this function runs the predicted activations generatred from running
	the inverse map on the desired task kinematics
	"""

	#loading data
	#print("loading data")
	#task_kinematics=np.load("task_kinematics.npy")
	#est_task_activations=np.load("est_task_activations.npy")

	MuJoCo_model = load_model_from_path("./models/"+MuJoCo_model_name)
	sim = MjSim(MuJoCo_model)
	if Mj_render:
		viewer = MjViewer(sim)
		# to move it to the mounted camera
		viewer.cam.fixedcamid += 1
		viewer.cam.type = const.CAMERA_FIXED
		# # to record the video
		# viewer._record_video = True
		# # recording path
		# viewer._video_path = "~/Documents/"+str(time.localtime()[3])+str(time.localtime()[4])+str(time.localtime()[5])
	sim_state = sim.get_state()
	control_vector_length=sim.data.ctrl.__len__()
	#print("control_vector_length: "+str(control_vector_length))

	number_of_task_samples=est_activations.shape[0]

	real_attempt_positions = np.zeros((number_of_task_samples,2))
	real_attempt_activations = np.zeros((number_of_task_samples,3))
	chassis_pos=np.zeros(number_of_task_samples,)
	sim.set_state(sim_state)
	for ii in range(number_of_task_samples):
	    sim.data.ctrl[:] = est_activations[ii,:]
	    sim.step()
	    current_positions_array = sim.data.qpos[-2:]
	    # current_kinematics_array=np.array(
	    # 	[sim.data.qpos[0],
	    # 	sim.data.qvel[0],
	    # 	sim.data.qacc[0],
	    # 	sim.data.qpos[1],
	    # 	sim.data.qvel[1],
	    # 	sim.data.qacc[1]]
	    # 	)
	    chassis_pos[ii]=sim.data.get_geom_xpos("Chassis_frame")[0]
	    real_attempt_positions[ii,:] = current_positions_array
	    real_attempt_activations[ii,:] = sim.data.ctrl
	    if Mj_render:
	    	viewer.render()
	real_attempt_kinematics = positions_to_kinematics_fcn(
		real_attempt_positions[:,0], real_attempt_positions[:,1], timestep = 0.01)
	return real_attempt_kinematics, real_attempt_activations, chassis_pos
Пример #8
0
def _setup_render_depth(sim: mujoco_py.MjSim) -> mujoco_py.MjSim:
  # create copy of simulation to customize rendering context
  # flags defined in mjvisualize.h
  render_sim = mujoco_py.MjSim(sim.model)
  render_sim.set_state(sim.get_state())
  render_ctx = mujoco_py.MjRenderContextOffscreen(render_sim)
  render_ctx.vopt.flags[1] = 0 # textures off
  render_ctx.scn.flags[0] = 0 # shadow off
  render_ctx.scn.flags[2] = 0 # reflection off
  render_ctx.scn.flags[4] = 0 # skybox off
  render_ctx.scn.stereo = 2 # side-by-side rendering
  return render_sim
Пример #9
0
class MyEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self):
        self.model = load_model_from_path("xmls/Tesrt.xml")
        self.sim = MjSim(self.model)
        self.viewer = None
        self.sim_state = self.sim.get_state()
        self.bodynames = [
            'torso1', 'head', 'uwaist', 'lwaist', 'butt',
            'right_thigh1', 'right_shin1', 'right_foot_cap1', 'right_foot_cap2', 'left_thigh1',
            'left_shin1', 'left_foot_cap1', 'left_foot_cap2', 'right_uarm1', 'right_larm',
            'right_hand', 'left_uarm1', 'left_larm', 'left_hand'
        ]

        ones_act = np.ones(len(self.sim.data.ctrl))
        ones_obs = np.ones(self._get_state().shape[0])

        self.action_space = spaces.Box(-ones_act, ones_act)
        self.observation_space = spaces.Box(-ones_obs, ones_obs)

    def _get_state(self):
        torso = []
        ret = []
        for i in range(len(self.bodynames)):
            vec = self.sim.data.get_geom_xpos(self.bodynames[i])
            if i==0:
                ret = vec
                torso = vec
            if i!=0:
                ret = np.append(ret, vec-torso)
        return ret
    
    def _get_reward(self):
        return self.sim.data.get_geom_xpos('head')[2]

    def _step(self, action):
        for i in range(len(action)):
            self.sim.data.ctrl[i] = action[i] * 0.5
        
        self.sim.step()
        self.sim.step()
        return self._get_state(),self._get_reward(),False,{}

    def _reset(self):
        self.sim.set_state(self.sim_state)
        return self._get_state()

    def _render(self, mode = 'human', close = False):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()
Пример #10
0
def test_cycle_through_orientations():
    panda_kinematics = ikpy_panda_kinematics.panda_kinematics()
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    x_target, y_target, z_target = 0.2, 0.0, 0.5
    step1 = np.linspace(0,np.pi/2, 100)
    step2 = np.linspace(np.pi/2, -np.pi/2, 200)
    step3 = np.linspace(-np.pi/2, 0, 100)
    sweep = np.concatenate((step1, step2, step3))
    roll = 0
    pitch = 0
    yaw = np.pi


    ee_goal = [x_target, y_target, z_target, roll, pitch, yaw]
    qinit = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6])

    sim.set_state(MjSimState(time=0,qpos=qinit,qvel=np.zeros(7),act=None,udd_state={}))
    sim.step()

    qgoal = qinit
    q = qinit
    qd = np.zeros(7)
    count = -1
    num_steps_per_change = 4
    for i in range(num_steps_per_change * 400):
        if i % num_steps_per_change == 0:
            count += 1
            ee_goal = [x_target, y_target, z_target, roll, pitch, sweep[count] + yaw]
            qgoal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q)
            R1 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(rpy=[roll, pitch, sweep[count] + yaw])
            R2 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(panda_kinematics.forward_kinematics(qgoal)[3:6])
            # R3 = sim.data.body_xmat[sim.model.body_name2id("right_hand")].reshape(3, 3)
            print("R1:\n", R1)
            print("R2:\n", R2)
            # print("R3:\n", R3)
            print("EE:", np.around(ee_goal, 3))
            print("q: ", np.around(qgoal, 3))

        state = sim.get_state()
        q = state[1]
        qd = state[2]
        sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=qgoal)
        sim.step()
        viewer.render()

    time.sleep(1)
Пример #11
0
class Mujoco_Renderer():
    def __init__(self, mujoco_xml, hp):
        from mujoco_py import load_model_from_path, MjSim
        mujoco_xml = '/'.join(str.split(gcp.__file__, '/')[:-1]) + '/' + mujoco_xml
        self.sim = MjSim(load_model_from_path(mujoco_xml))
        self._hp = hp

    def render(self, qpos):
        sim_state = self.sim.get_state()
        sim_state.qpos[:2] = qpos
        sim_state.qvel[:] = np.zeros_like(self.sim.data.qvel)
        self.sim.set_state(sim_state)
        self.sim.forward()
        
        subgoal_image = self.sim.render(self._hp.mpar.img_sz, self._hp.mpar.img_sz, camera_name='maincam')
        # plt.imshow(subgoal_image)
        # plt.savefig('test.png')
        return subgoal_image
Пример #12
0
class ArmTaskEnv:
    """
    States:
        sensors
    Actions:
        actuators
    """
    def __init__(self, model_file):
        # Load model to MuJoCo
        self.model_file = model_file
        self.mj_model = load_model_from_path(model_file)
        self.sim = MjSim(self.mj_model)
        #self.sim = mujoco.Physics.from_xml_path(model_file)

        # Parse state and action spaces
        self.state_dim = len(self.sim.data.sensordata)
        self.action_dim = len(self.sim.data.ctrl)

        # Rendering
        self.viewer = None

    def reset(self):
        self.sim.reset()

    def step(self, action):
        # Assign action to actuators
        self.sim.data.ctrl[:] = action.copy()

        # Simulate step
        self.sim.step()

        # Get next state
        next_state = self.sim.data.sensordata.copy()

        return next_state

    def render(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()

    @property
    def time(self):
        return self.sim.get_state().time
Пример #13
0
    def __init__(self, sparse_reward=True, horizon=250):
        #print(horizon)
        self.sparse_reward = sparse_reward
        model = load_model_from_path("difficult-point.xml")
        sim = MjSim(model)
        self.sim = sim
        viewer = MjViewer(sim)
        self.init_state = sim.get_state()
        self.model = model
        self.viewer = viewer
        self.action_dim = len(self.sim.data.ctrl)
        self.obs_dim = len(self.get_obs())

        high = np.array([np.inf] * self.obs_dim)
        # self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([2.0] * self.action_dim)
        self.action_space = spaces.Box(-high, high, dtype=np.float32)
        self.metadata = None
        self.horizon = horizon
        self.cur_step = 0
Пример #14
0
def mp_test_states():
    sim = MjSim(load_model_from_xml(BASIC_MODEL_XML))

    states = []
    for val in range(3):
        sim.data.qpos[:3] = val * 0.1
        states.append(sim.get_state())

    pool = MjRenderPool(sim.model, n_workers=3)

    images = pool.render(100, 100, states=states[:2])
    assert images.shape == (2, 100, 100, 3)
    compare_imgs(images[0], 'test_render_pool.mp_test_states.1.png')
    compare_imgs(images[1], 'test_render_pool.mp_test_states.2.png')

    states = list(reversed(states))
    images = pool.render(100, 100, states=states)
    assert images.shape == (3, 100, 100, 3)
    compare_imgs(images[0], 'test_render_pool.mp_test_states.3.png')
    compare_imgs(images[1], 'test_render_pool.mp_test_states.4.png')
    compare_imgs(images[2], 'test_render_pool.mp_test_states.5.png')
Пример #15
0
class FastResetMujocoEnv(MujocoEnv):
    """Only loads the mujoco XML file once to allow for quicker resets."""
    def _reset_internal(self):
        """Resets simulation internal configurations."""
        # instantiate simulation from MJCF model
        self._load_model()
        if not hasattr(self, "mjpy_model"):
            self.mjpy_model = self.model.get_model(mode="mujoco_py")
            self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0
            )
            self.viewer.viewer.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0
            )
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0
            )

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False
Пример #16
0
class Actor(Environment):
    def __init__(self, model_path, epsilon, epsilon_min, epsilon_decay,
                 max_steps):
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.model3D = load_model_from_path(model_path)
        self.sim = MjSim(self.model3D)
        self.q_network = None
        self.max_steps = max_steps
        Environment.__init__(self, self.sim)

    def load_model(self, path):
        self.q_network = load_model(path)

    def get_possible_actions(self):
        return np.array([[-1], [1]])

    def do_action(self, a):
        self.sim.data.ctrl[0:] = a

    def act(self, state):
        q_values = self.q_network.predict(state)
        if random.random() < self.epsilon:
            action = random.randint(0, len(q_values[0]) - 1)
        else:
            action = np.argmax(q_values)
        return action

    def observe(self, state):
        action = self.act(state)
        self.do_action(self.get_possible_actions()[action])
        self.sim.step()
        s = np.array([
            self.sim.get_state().qpos.tolist() +
            self.sim.get_state().qvel.tolist()
        ])
        r = self.get_reward()
        return s, action, r, self.is_done()
Пример #17
0
    def __init__(self, sparse_reward=False, horizon=700, fixed_reset=True):
        self.sparse_reward = sparse_reward
        model = load_model_from_path("envs/reacher.xml")
        sim = MjSim(model)
        self.sim = sim
        viewer = MjViewer(sim)
        self.init_state = sim.get_state()
        self.model = model
        self.viewer = viewer
        self.action_dim = len(self.sim.data.ctrl)
        #print(self.action_dim)
        self.obs_dim = len(self.get_obs())
        self.fixed_reset = fixed_reset

        high = np.array([np.inf] * self.obs_dim)
        # self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([2.0] * self.action_dim)
        self.action_space = spaces.Box(-high, high, dtype=np.float32)
        self.metadata = None
        self.horizon = horizon
        self.cur_step = 0
Пример #18
0
    def __init__(self, sparse_reward=False, horizon=200):
        self.goal_idx = 0  #np.random.randint(0, 4)
        self.goal_one_hot = np.array([0, 0, 0, 0])
        self.goal_one_hot[self.goal_idx] = 1
        self.sparse_reward = sparse_reward
        model = load_model_from_path("envs/ReacherDistractor.xml")
        sim = MjSim(model)
        self.sim = sim
        viewer = MjViewer(sim)
        self.init_state = sim.get_state()
        self.model = model
        self.viewer = viewer
        self.action_dim = len(self.sim.data.ctrl)
        self.obs_dim = len(self.get_obs())

        high = np.array([np.inf] * self.obs_dim)
        # self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([2.0] * self.action_dim)
        self.action_space = spaces.Box(-high, high, dtype=np.float32)
        self.metadata = None
        self.horizon = horizon
        self.cur_step = 0
Пример #19
0
class MujocoEnv(metaclass=EnvMeta):
    """
    Initializes a Mujoco Environment.
    Args:
        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.
        has_offscreen_renderer (bool): True if using off-screen rendering.
        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse
        render_collision_mesh (bool): True if rendering collision meshes
            in camera. False otherwise.
        render_visual_mesh (bool): True if rendering visual meshes
            in camera. False otherwise.
        render_gpu_device_id (int): corresponds to the GPU device id to use for offscreen rendering.
            Defaults to -1, in which case the device will be inferred from environment variables
            (GPUS or CUDA_VISIBLE_DEVICES).
        control_freq (float): how many control signals to receive
            in every simulated second. This sets the amount of simulation time
            that passes between every action input.
        horizon (int): Every episode lasts for exactly @horizon timesteps.
        ignore_done (bool): True if never terminating the environment (ignore @horizon).
        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables
        renderer (str): string for the renderer to use
        renderer_config (dict): dictionary for the renderer configurations
    Raises:
        ValueError: [Invalid renderer selection]
    """

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        render_gpu_device_id=-1,
        control_freq=20,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        renderer="mujoco",
        renderer_config=None,
    ):
        # First, verify that both the on- and off-screen renderers are not being used simultaneously
        if has_renderer is True and has_offscreen_renderer is True:
            raise ValueError("the onscreen and offscreen renderers cannot be used simultaneously.")

        # Rendering-specific attributes
        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_camera = render_camera
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.render_gpu_device_id = render_gpu_device_id
        self.viewer = None

        # Simulation-specific attributes
        self._observables = {}                      # Maps observable names to observable objects
        self._obs_cache = {}                        # Maps observable names to pre-/partially-computed observable values
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.hard_reset = hard_reset
        self._model_postprocessor = None            # Function to post-process model after load_model() call
        self.model = None
        self.cur_time = None
        self.model_timestep = None
        self.control_timestep = None
        self.deterministic_reset = False            # Whether to add randomized resetting of objects / robot joints

        self.renderer = renderer
        self.renderer_config = renderer_config

        # Load the model
        self._load_model()

        # Post-process model
        self._postprocess_model()

        # Initialize the simulation
        self._initialize_sim()

        #initializes the rendering
        self.initialize_renderer()

        # Run all further internal (re-)initialization required
        self._reset_internal()

        # Load observables
        if hasattr(self.viewer, '_setup_observables'):
            self._observables = self.viewer._setup_observables()
        else:
            self._observables = self._setup_observables()

        # check if viewer has get observations method and set a flag for future use.
        self.viewer_get_obs = hasattr(self.viewer, '_get_observations')

    def initialize_renderer(self):
        self.renderer = self.renderer.lower()

        if self.renderer_config is None and self.renderer != 'mujoco':
            self.renderer_config = load_renderer_config(self.renderer)

        if self.renderer == 'mujoco' or self.renderer == 'default':
            pass
        elif self.renderer == 'nvisii':
            from robosuite.renderers.nvisii.nvisii_renderer import NVISIIRenderer

            self.viewer = NVISIIRenderer(env=self,
                                         **self.renderer_config)
        elif self.renderer == 'igibson':
            from robosuite.renderers.igibson.igibson_renderer import iGibsonRenderer

            self.viewer = iGibsonRenderer(env=self,
                                         **self.renderer_config
                                         )
        else:
            raise ValueError(f'{self.renderer} is not a valid renderer name. Valid options include default (native mujoco renderer), nvisii, and igibson')

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.
        Args:
            control_freq (float): Hz rate to run control loop at within the simulation
        """
        self.cur_time = 0
        self.model_timestep = macros.SIMULATION_TIMESTEP
        if self.model_timestep <= 0:
            raise ValueError("Invalid simulation timestep defined!")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError("Control frequency {} is invalid".format(control_freq))
        self.control_timestep = 1. / control_freq

    def set_model_postprocessor(self, postprocessor):
        """
        Sets the post-processor function that self.model will be passed to after load_model() is called during resets.
        Args:
            postprocessor (None or function): If set, postprocessing method should take in a Task-based instance and
                return no arguments.
        """
        self._model_postprocessor = postprocessor

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _postprocess_model(self):
        """
        Post-processes model after load_model() call. Useful for external objects (e.g.: wrappers) to
        be able to modify the sim model before it is actually loaded into the simulation
        """
        if self._model_postprocessor is not None:
            self._model_postprocessor(self.model)

    def _setup_references(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        # Setup mappings from model to IDs
        self.model.generate_id_mappings(sim=self.sim)

    def _setup_observables(self):
        """
        Sets up observables to be used for this environment.
        Returns:
            OrderedDict: Dictionary mapping observable names to its corresponding Observable object
        """
        return OrderedDict()

    def _initialize_sim(self, xml_string=None):
        """
        Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created
        from the specified xml_string. Else, it will pull from self.model to instantiate the simulation
        Args:
            xml_string (str): If specified, creates MjSim object from this filepath
        """
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = load_model_from_xml(xml_string) if xml_string else self.model.get_model(mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.forward()

        # Setup sim time based on control frequency
        self.initialize_time(self.control_freq)

    def reset(self):
        """
        Resets simulation.
        Returns:
            OrderedDict: Environment observation space after reset occurs
        """
        # TODO(yukez): investigate black screen of death
        # Use hard reset if requested

        if self.hard_reset and not self.deterministic_reset:
            if self.renderer == 'mujoco' or self.renderer == 'default':
                self._destroy_viewer()
            self._load_model()
            self._postprocess_model()
            self._initialize_sim()
        # Else, we only reset the sim internally
        else:
            self.sim.reset()
        
        # Reset necessary robosuite-centric variables
        self._reset_internal()
        self.sim.forward()
        # Setup observables, reloading if
        self._obs_cache = {}
        if self.hard_reset:
            # If we're using hard reset, must re-update sensor object references
            _observables = self._setup_observables()
            for obs_name, obs in _observables.items():
                self.modify_observable(observable_name=obs_name, attribute="sensor", modifier=obs._sensor)
        # Make sure that all sites are toggled OFF by default
        self.visualize(vis_settings={vis: False for vis in self._visualizations})

        if self.viewer is not None and self.renderer != 'mujoco':
            self.viewer.reset()
        
        observations = self.viewer._get_observations(force_update=True) if self.viewer_get_obs else self._get_observations(force_update=True)

        # Return new observations
        return observations

    def _reset_internal(self):
        """Resets simulation internal configurations."""

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.viewer.viewer.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

            # make sure mujoco-py doesn't block rendering frames
            # (see https://github.com/StanfordVL/robosuite/issues/39)
            self.viewer.viewer._render_every_frame = True

            # Set the camera angle for viewing
            if self.render_camera is not None:
                self.viewer.set_camera(camera_id=self.sim.model.camera_name2id(self.render_camera))

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim, device_id=self.render_gpu_device_id)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._setup_references()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

        # Empty observation cache and reset all observables
        self._obs_cache = {}
        for observable in self._observables.values():
            observable.reset()

    def _update_observables(self, force=False):
        """
        Updates all observables in this environment
        Args:
            force (bool): If True, will force all the observables to update their internal values to the newest
                value. This is useful if, e.g., you want to grab observations when directly setting simulation states
                without actually stepping the simulation.
        """
        for observable in self._observables.values():
            observable.update(timestep=self.model_timestep, obs_cache=self._obs_cache, force=force)

    def _get_observations(self, force_update=False):
        """
        Grabs observations from the environment.
        Args:
            force_update (bool): If True, will force all the observables to update their internal values to the newest
                value. This is useful if, e.g., you want to grab observations when directly setting simulation states
                without actually stepping the simulation.
        Returns:
            OrderedDict: OrderedDict containing observations [(name_string, np.array), ...]
        """
        observations = OrderedDict()
        obs_by_modality = OrderedDict()

        # Force an update if requested
        if force_update:
            self._update_observables(force=True)

        # Loop through all observables and grab their current observation
        for obs_name, observable in self._observables.items():
            if observable.is_enabled() and observable.is_active():
                obs = observable.obs
                observations[obs_name] = obs
                modality = observable.modality + "-state"
                if modality not in obs_by_modality:
                    obs_by_modality[modality] = []
                # Make sure all observations are numpy arrays so we can concatenate them
                array_obs = [obs] if type(obs) in {int, float} or not obs.shape else obs
                obs_by_modality[modality].append(np.array(array_obs))

        # Add in modality observations
        for modality, obs in obs_by_modality.items():
            # To save memory, we only concatenate the image observations if explicitly requested
            if modality == "image-state" and not macros.CONCATENATE_IMAGES:
                continue
            observations[modality] = np.concatenate(obs, axis=-1)

        return observations

    def step(self, action):
        """
        Takes a step in simulation with control command @action.
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            4-tuple:
                - (OrderedDict) observations from the environment
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) misc information
        Raises:
            ValueError: [Steps past episode termination]
        """
        if self.done:
            raise ValueError("executing action in terminated episode")

        self.timestep += 1

        # Since the env.step frequency is slower than the mjsim timestep frequency, the internal controller will output
        # multiple torque commands in between new high level action commands. Therefore, we need to denote via
        # 'policy_step' whether the current step we're taking is simply an internal update of the controller,
        # or an actual policy update
        policy_step = True

        # Loop through the simulation at the model timestep rate until we're ready to take the next policy step
        # (as defined by the control frequency specified at the environment level)
        for i in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self._pre_action(action, policy_step)
            self.sim.step()
            self._update_observables()
            policy_step = False

        # Note: this is done all at once to avoid floating point inaccuracies
        self.cur_time += self.control_timestep

        reward, done, info = self._post_action(action)

        if self.viewer is not None and self.renderer != 'mujoco':
            self.viewer.update()

        observations = self.viewer._get_observations() if self.viewer_get_obs else self._get_observations()
        return observations, reward, done, info

    def _pre_action(self, action, policy_step=False):
        """
        Do any preprocessing before taking an action.
        Args:
            action (np.array): Action to execute within the environment
            policy_step (bool): Whether this current loop is an actual policy step or internal sim update step
        """
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """
        Do any housekeeping after taking an action.
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            3-tuple:
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) empty dict to be filled with information by subclassed method
        """
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done

        return reward, self.done, {}

    def reward(self, action):
        """
        Reward should be a function of state and action
        Args:
            action (np.array): Action to execute within the environment
        Returns:
            float: Reward from environment
        """
        raise NotImplementedError

    def render(self):
        """
        Renders to an on-screen window.
        """
        self.viewer.render()

    def get_pixel_obs(self):
        """
        Gets the pixel observations for the environment from the specified renderer
        """
        self.viewer.get_pixel_obs()

    def close_renderer(self):
        """
        Closes the renderer
        """
        self.viewer.close()

    def observation_spec(self):
        """
        Returns an observation as observation specification.
        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.
        Returns:
            OrderedDict: Observations from the environment
        """
        observation = self.viewer._get_observations() if self.viewer_get_obs else self._get_observations()
        return observation

    def clear_objects(self, object_names):
        """
        Clears objects with the name @object_names out of the task space. This is useful
        for supporting task modes with single types of objects, as in
        @self.single_object_mode without changing the model definition.
        Args:
            object_names (str or list of str): Name of object(s) to remove from the task workspace
        """
        object_names = {object_names} if type(object_names) is str else set(object_names)
        for obj in self.model.mujoco_objects:
            if obj.name in object_names:
                self.sim.data.set_joint_qpos(obj.joints[0], np.array((10, 10, 10, 1, 0, 0, 0)))

    def visualize(self, vis_settings):
        """
        Do any needed visualization here
        Args:
            vis_settings (dict): Visualization keywords mapped to T/F, determining whether that specific
                component should be visualized. Should have "env" keyword as well as any other relevant
                options specified.
        """
        # Set visuals for environment objects
        for obj in self.model.mujoco_objects:
            obj.set_sites_visibility(sim=self.sim, visible=vis_settings["env"])

    def set_camera_pos_quat(self, camera_pos, camera_quat):
        if self.renderer in ["nvisii", "igibson"]:
            self.viewer.set_camera_pos_quat(camera_pos, camera_quat)
        else:
            raise AttributeError('setting camera position and quat requires renderer to be either NVISII or iGibson.')

    def reset_from_xml_string(self, xml_string):
        """
        Reloads the environment from an XML description of the environment.
        Args:
            xml_string (str): Filepath to the xml file that will be loaded directly into the sim
        """

        # if there is an active viewer window, destroy it
        if self.renderer != 'nvisii':
            self.close()

        # Since we are reloading from an xml_string, we are deterministically resetting
        self.deterministic_reset = True

        # initialize sim from xml
        self._initialize_sim(xml_string=xml_string)

        # Now reset as normal
        self.reset()

        # Turn off deterministic reset
        self.deterministic_reset = False

    def check_contact(self, geoms_1, geoms_2=None):
        """
        Finds contact between two geom groups.
        Args:
            geoms_1 (str or list of str or MujocoModel): an individual geom name or list of geom names or a model. If
                a MujocoModel is specified, the geoms checked will be its contact_geoms
            geoms_2 (str or list of str or MujocoModel or None): another individual geom name or list of geom names.
                If a MujocoModel is specified, the geoms checked will be its contact_geoms. If None, will check
                any collision with @geoms_1 to any other geom in the environment
        Returns:
            bool: True if any geom in @geoms_1 is in contact with any geom in @geoms_2.
        """
        # Check if either geoms_1 or geoms_2 is a string, convert to list if so
        if type(geoms_1) is str:
            geoms_1 = [geoms_1]
        elif isinstance(geoms_1, MujocoModel):
            geoms_1 = geoms_1.contact_geoms
        if type(geoms_2) is str:
            geoms_2 = [geoms_2]
        elif isinstance(geoms_2, MujocoModel):
            geoms_2 = geoms_2.contact_geoms
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 if geoms_2 is not None else True
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if geoms_2 is not None else True
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                return True
        return False

    def get_contacts(self, model):
        """
        Checks for any contacts with @model (as defined by @model's contact_geoms) and returns the set of
        geom names currently in contact with that model (excluding the geoms that are part of the model itself).
        Args:
            model (MujocoModel): Model to check contacts for.
        Returns:
            set: Unique geoms that are actively in contact with this model.
        Raises:
            AssertionError: [Invalid input type]
        """
        # Make sure model is MujocoModel type
        assert isinstance(model, MujocoModel), \
            "Inputted model must be of type MujocoModel; got type {} instead!".format(type(model))
        contact_set = set()
        for contact in self.sim.data.contact[: self.sim.data.ncon]:
            # check contact geom in geoms; add to contact set if match is found
            g1, g2 = self.sim.model.geom_id2name(contact.geom1), self.sim.model.geom_id2name(contact.geom2)
            if g1 in model.contact_geoms and g2 not in model.contact_geoms:
                contact_set.add(g2)
            elif g2 in model.contact_geoms and g1 not in model.contact_geoms:
                contact_set.add(g1)
        return contact_set

    def add_observable(self, observable):
        """
        Adds an observable to this environment.
        Args:
            observable (Observable): Observable instance.
        """
        assert observable.name not in self._observables,\
            "Observable name {} is already associated with an existing observable! Use modify_observable(...) " \
            "to modify a pre-existing observable.".format(observable.name)
        self._observables[observable.name] = observable

    def modify_observable(self, observable_name, attribute, modifier):
        """
        Modifies observable with associated name @observable_name, replacing the given @attribute with @modifier.
        Args:
             observable_name (str): Observable to modify
             attribute (str): Observable attribute to modify.
                Options are {`'sensor'`, `'corrupter'`,`'filter'`,  `'delayer'`, `'sampling_rate'`,
                `'enabled'`, `'active'`}
             modifier (any): New function / value to replace with for observable. If a function, new signature should
                match the function being replaced.
        """
        # Find the observable
        assert observable_name in self._observables, "No valid observable with name {} found. Options are: {}".\
            format(observable_name, self.observation_names)
        obs = self._observables[observable_name]
        # replace attribute accordingly
        if attribute == "sensor":
            obs.set_sensor(modifier)
        elif attribute == "corrupter":
            obs.set_corrupter(modifier)
        elif attribute == "filter":
            obs.set_filter(modifier)
        elif attribute == "delayer":
            obs.set_delayer(modifier)
        elif attribute == "sampling_rate":
            obs.set_sampling_rate(modifier)
        elif attribute == "enabled":
            obs.set_enabled(modifier)
        elif attribute == "active":
            obs.set_active(modifier)
        else:
            # Invalid attribute specified
            raise ValueError("Invalid observable attribute specified. Requested: {}, valid options are {}".
                             format(attribute, {"sensor", "corrupter", "filter", "delayer",
                                                "sampling_rate", "enabled", "active"}))

    def _check_success(self):
        """
        Checks if the task has been completed. Should be implemented by subclasses
        Returns:
            bool: True if the task has been completed
        """
        raise NotImplementedError

    def _destroy_viewer(self):
        """
        Destroys the current mujoco renderer instance if it exists
        """
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()

    @property
    def observation_modalities(self):
        """
        Modalities for this environment's observations
        Returns:
            set: All observation modalities
        """
        return set([observable.modality for observable in self._observables.values()])

    @property
    def observation_names(self):
        """
        Grabs all names for this environment's observables
        Returns:
            set: All observation names
        """
        return set(self._observables.keys())

    @property
    def enabled_observables(self):
        """
        Grabs all names of enabled observables for this environment. An observable is considered enabled if its values
        are being continually computed / updated at each simulation timestep.
        Returns:
            set: All enabled observation names
        """
        return set([name for name, observable in self._observables.items() if observable.is_enabled()])

    @property
    def active_observables(self):
        """
        Grabs all names of active observables for this environment. An observable is considered active if its value is
        being returned in the observation dict from _get_observations() call or from the step() call (assuming this
        observable is enabled).
        Returns:
            set: All active observation names
        """
        return set([name for name, observable in self._observables.items() if observable.is_active()])

    @property
    def _visualizations(self):
        """
        Visualization keywords for this environment
        Returns:
            set: All components that can be individually visualized for this environment
        """
        return {"env"}

    @property
    def action_spec(self):
        """
        Action specification should be implemented in subclasses.
        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    @property
    def action_dim(self):
        """
        Size of the action space
        Returns:
            int: Action space dimension
        """
        raise NotImplementedError
Пример #20
0
def test_sim_state():
    model = load_model_from_xml(BASIC_MODEL_XML)

    foo = 10
    d = {"foo": foo,
         "foo_array": np.array([foo, foo, foo]),
         "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)),
         }

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    state = sim.get_state()
    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state.udd_state.keys():
        if (isinstance(state.udd_state[k], Number)):
            assert state.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state.udd_state[k], sim.udd_state[k])

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8)

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state2.udd_state.keys():
        if (isinstance(state2.udd_state[k], Number)):
            assert state2.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state2.udd_state[k], sim.udd_state[k])

    assert state2 == state
    assert not state2 != state

    # test equality with deleting keys
    state2 = state2._replace(udd_state={"foo": foo})
    assert state2 != state
    assert not (state2 == state)

    # test equality with changing contents of array
    state2 = state2._replace(
        udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])})
    assert state2 != state
    assert not (state2 == state)

    # test equality with adding keys
    d2 = dict(d)
    d2.update({"not_foo": foo})
    state2 = state2._replace(udd_state=d2)
    assert state2 != state
    assert not (state2 == state)

    # test defensive copy
    sim.set_state(state)
    state.qpos[0] = -1
    assert not np.array_equal(state.qpos, sim.data.qpos)

    state3 = sim.get_state()
    state3.qpos[0] = -1
    assert not np.array_equal(state3.qpos, sim.data.qpos)
    state3.udd_state["foo_array"][0] = -1
    assert not np.array_equal(
        state3.udd_state["foo_array"], sim.udd_state["foo_array"])

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    state = sim.get_state()
    print("state.udd_state = %s" % state.udd_state)

    assert state.udd_state == {}

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    assert state.udd_state == sim.udd_state
Пример #21
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self,
                 env_name,
                 rand_seed,
                 maximum_length,
                 model_path,
                 frame_skip,
                 misc_info={}):
        self._env_name = env_name
        self._rand_seed = rand_seed
        self._maximum_length = maximum_length
        self._misc_info = misc_info

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()

        # Why is this here? Causes errors
        #observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        #assert not done
        #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        # annoying should replace
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed(self._rand_seed)

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(self._rand_seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def get_env_state(self):
        """
        Get full state of the environment beyond qpos and qvel
        For example, if targets are defined using sites, this function should also
        contain location of the sites (which are not included in qpos).
        Must return a dictionary that can be used in the set_env_state function
        """
        raise NotImplementedError

    def set_env_state(self, state):
        """
        Uses the state dictionary to set the state of the world
        """
        raise NotImplementedError

    # -----------------------------

    def reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob, 0, False, {}

    def reset_soft(self, seed=None):
        return self._old_obs, 0, False, {}

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='./tmp/',
                                   filename='newvid',
                                   it=0,
                                   camera_name=None):

        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o, *_ = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy(o)
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + str(it) + ".mp4"

            if not os.path.exists(save_loc):
                os.makedirs(save_loc)

            imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0)
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
Пример #22
0
def test_sim_state():
    model = load_model_from_xml(BASIC_MODEL_XML)

    foo = 10
    d = {"foo": foo,
         "foo_array": np.array([foo, foo, foo]),
         "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)),
         }

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    state = sim.get_state()
    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state.udd_state.keys():
        if (isinstance(state.udd_state[k], Number)):
            assert state.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state.udd_state[k], sim.udd_state[k])

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8)

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state2.udd_state.keys():
        if (isinstance(state2.udd_state[k], Number)):
            assert state2.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state2.udd_state[k], sim.udd_state[k])

    assert state2 == state
    assert not state2 != state

    # test equality with deleting keys
    state2 = state2._replace(udd_state={"foo": foo})
    assert state2 != state
    assert not (state2 == state)

    # test equality with changing contents of array
    state2 = state2._replace(
        udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])})
    assert state2 != state
    assert not (state2 == state)

    # test equality with adding keys
    d2 = dict(d)
    d2.update({"not_foo": foo})
    state2 = state2._replace(udd_state=d2)
    assert state2 != state
    assert not (state2 == state)

    # test defensive copy
    sim.set_state(state)
    state.qpos[0] = -1
    assert not np.array_equal(state.qpos, sim.data.qpos)

    state3 = sim.get_state()
    state3.qpos[0] = -1
    assert not np.array_equal(state3.qpos, sim.data.qpos)
    state3.udd_state["foo_array"][0] = -1
    assert not np.array_equal(
        state3.udd_state["foo_array"], sim.udd_state["foo_array"])

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    state = sim.get_state()
    print("state.udd_state = %s" % state.udd_state)

    assert state.udd_state == {}

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    assert state.udd_state == sim.udd_state
Пример #23
0
def main(data_dir):
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    initial_state = MjSimState(time=0,
                               qpos=np.array([
                                   0, -np.pi / 4, 0, -3 * np.pi / 4 + 1, 0,
                                   np.pi / 2, np.pi / 4
                               ]),
                               qvel=np.zeros(7),
                               act=None,
                               udd_state={})
    sim.set_state(initial_state)
    sim.step()
    traj_count = 0
    control_state = None
    while (traj_count < NUM_TRAJECTORIES and control_state != FINISHED):
        control_state = ACCELERATING
        outputFile = None
        initial_q = sim.get_state()[q_INDEX]
        velGen = randVelGen.RandVelGenerator()
        qd_des = velGen.generatePatternVel()
        coming_back_time = 0.0
        time = 0
        while control_state != FINISHED:
            state = sim.get_state()
            q = state[q_INDEX]
            qd = state[qd_INDEX]
            boundViolations = bounds.getBoundViolations(q)
            # RD =
            TB = bounds.tableBoundViolation(sim)
            OB = bounds.outOfBounds(q)
            DA = helperFun.moving(qd)
            DC = helperFun.stopped(qd)
            DD = DC
            DB = coming_back_time > RETURN_TIME
            FN = traj_count >= NUM_TRAJECTORIES

            prev_state = control_state
            # transition block
            if control_state == ACCELERATING:
                if not TB and not OB and not DA:
                    control_state = ACCELERATING
                elif TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0.0
                elif not TB and OB:
                    control_state = DAMPING
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif not TB and not OB and DA:
                    control_state = COASTING
                    traj_count += 1
                    outputFile = open(data_dir +
                                      helperFun.getUniqueFileName("traj"),
                                      mode='x')
                    outputWriter = csv.writer(outputFile, delimiter=',')
                    print_count(traj_count)
                else:
                    control_state = FINISHED
                    print("Unknown transistion! ACCELERATING")

            elif control_state == COASTING:
                if not FN and not TB and not OB and DC:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                    outputFile.close()
                elif not FN and TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0
                    outputFile.close()
                elif not FN and not TB and OB:
                    control_state = DAMPING
                    outputFile.close()
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif FN:
                    control_state = FINISHED
                    outputFile.close()
                elif not FN and not TB and not OB and not DC:
                    control_state = COASTING
                else:
                    control_state = FINISHED
                    print("Unknown transition! COASTING")
                    outputFile.close()

            elif control_state == DAMPING:
                if not TB and not DD:
                    control_state = DAMPING
                elif TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0.0
                elif not TB and DD:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                else:
                    control_state = FINISHED
                    print("Unknow transition! DAMPING")

            elif control_state == COMING_BACK_IN_BOUNDS:
                if not DB:
                    control_state = COMING_BACK_IN_BOUNDS
                elif DB and OB:
                    control_state = DAMPING
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif DB and not OB:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                else:
                    control_state = FINISHED
                    print("Unknown transition! COMING_BACK_IN_BOUNDS")

            elif control_state == FINISHED:
                control_state = FINISHED

            else:
                control_state = FINISHED
                print("Got to an invalid state!")

            # debug states
            if prev_state != control_state:
                if control_state == ACCELERATING:
                    print("ACCELERATING")
                elif control_state == COASTING:
                    print("COASTING")
                elif control_state == DAMPING:
                    print("DAMPING")
                elif control_state == COMING_BACK_IN_BOUNDS:
                    print("COMING_BACK_IN_BOUNDS")
                elif control_state == "FINISHED":
                    print("FINISHED")
                else:
                    print("In a bad state!")

            torques = np.zeros(7)
            if control_state == ACCELERATING:
                torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd)

            elif control_state == COASTING:
                data = np.concatenate(
                    (q, qd, data_calc.get_3D_data(sim), [time]))
                outputWriter.writerow(data)
                torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd)

            elif control_state == DAMPING:
                torques = controllers.dampingControl(qd)

            elif control_state == COMING_BACK_IN_BOUNDS:
                coming_back_time += TIMESTEP
                torques = controllers.moveAwayFromTable(q=q, qd=qd)

            elif control_state == FINISHED:
                outputFile.close()
                break
            else:
                print("Got to an invalid state!")
                control_state = FINISHED
                break

            sim.data.ctrl[:] = torques
            sim.step()
            viewer.render()
            time += TIMESTEP
Пример #24
0
class MujocoEnv(metaclass=EnvMeta):
    """Initializes a Mujoco Environment."""

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        use_camera_obs=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            has_renderer (bool): If true, render the simulation state in 
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes 
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes 
                in camera. False otherwise.

            control_freq (float): how many control signals to receive 
                in every simulated second. This sets the amount of simulation time 
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            use_camera_obs (bool): if True, every observation includes a 
                rendered image.

            camera_name (str): name of camera to be rendered. Must be 
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.viewer = None
        self.model = None

        # settings for camera observations
        self.use_camera_obs = use_camera_obs
        if self.use_camera_obs and not self.has_offscreen_renderer:
            raise ValueError("Camera observations require an offscreen renderer.")
        self.camera_name = camera_name
        if self.use_camera_obs and self.camera_name is None:
            raise ValueError("Must specify camera name when using camera obs")
        self.camera_height = camera_height
        self.camera_width = camera_width
        self.camera_depth = camera_depth

        self._reset_internal()

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.
        """
        self.cur_time = 0
        self.model_timestep = self.sim.model.opt.timestep
        if self.model_timestep <= 0:
            raise XMLError("xml model defined non-positive time step")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError(
                "control frequency {} is invalid".format(control_freq)
            )
        self.control_timestep = 1. / control_freq

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        pass

    def reset(self):
        """Resets simulation."""
        # TODO(yukez): investigate black screen of death
        # if there is an active viewer window, destroy it
        self._destroy_viewer()
        self._reset_internal()
        self.sim.forward()
        return self._get_observation()

    def _reset_internal(self):
        """Resets simulation internal configurations."""
        # instantiate simulation from MJCF model
        self._load_model()
        self.mjpy_model = self.model.get_model(mode="mujoco_py")
        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0
            )
            self.viewer.viewer.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0
            )
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0
            )

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def _get_observation(self):
        """Returns an OrderedDict containing observations [(name_string, np.array), ...]."""
        return OrderedDict()

    def step(self, action):
        """Takes a step in simulation with control command @action."""
        if self.done:
            raise ValueError("executing action in terminated episode")

        self.timestep += 1
        self._pre_action(action)
        end_time = self.cur_time + self.control_timestep
        while self.cur_time < end_time:
            self.sim.step()
            self.cur_time += self.model_timestep
        reward, done, info = self._post_action(action)
        return self._get_observation(), reward, done, info

    def _pre_action(self, action):
        """Do any preprocessing before taking an action."""
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """Do any housekeeping after taking an action."""
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done
        return reward, self.done, {}

    def reward(self, action):
        """Reward should be a function of state and action."""
        return 0

    def render(self):
        """
        Renders to an on-screen window.
        """
        self.viewer.render()

    def observation_spec(self):
        """
        Returns an observation as observation specification.

        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.
        """
        observation = self._get_observation()
        return observation

        # observation_spec = OrderedDict()
        # for k, v in observation.items():
        #     observation_spec[k] = v.shape
        # return observation_spec

    def action_spec(self):
        """
        Action specification should be implemented in subclasses.

        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    def reset_from_xml_string(self, xml_string):
        """Reloads the environment from an XML description of the environment."""

        # if there is an active viewer window, destroy it
        self.close()

        # load model from xml
        self.mjpy_model = load_model_from_xml(xml_string)

        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0
            )
            self.viewer.viewer.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

        elif self.has_offscreen_renderer:
            render_context = MjRenderContextOffscreen(self.sim)
            render_context.vopt.geomgroup[0] = 1 if self.render_collision_mesh else 0
            render_context.vopt.geomgroup[1] = 1 if self.render_visual_mesh else 0
            self.sim.add_render_context(render_context)

        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

        # necessary to refresh MjData
        self.sim.forward()

    def find_contacts(self, geoms_1, geoms_2):
        """
        Finds contact between two geom groups.

        Args:
            geoms_1: a list of geom names (string)
            geoms_2: another list of geom names (string)

        Returns:
            iterator of all contacts between @geoms_1 and @geoms_2
        """
        for contact in self.sim.data.contact[0 : self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                yield contact

    def _check_contact(self):
        """Returns True if gripper is in contact with an object."""
        return False

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        return False

    def _destroy_viewer(self):
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()
Пример #25
0
class MujocoEnv(gym.Env):
    def __init__(self,
                 model_path,
                 frame_skip=1,
                 action_noise=0.0,
                 random_init_state=True):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)

        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        self.init_qacc = self.data.qacc.ravel().copy()
        self.init_ctrl = self.data.ctrl.ravel().copy()

        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size

        self.action_noise = action_noise
        self.random_init_state = random_init_state
        """
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos

        """
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    def action_space(self):
        bounds = self.model.actuator_ctrlrange.copy()
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.low, self.action_space.high

    def reset_mujoco(self, init_state=None):
        if init_state is None:
            if self.random_init_state:
                qp = self.init_qpos.copy() + \
                     np.random.normal(size=self.init_qpos.shape) * 0.01
                qv = self.init_qvel.copy() + \
                     np.random.normal(size=self.init_qvel.shape) * 0.1
            else:
                qp = self.init_qpos.copy()
                qv = self.init_qvel.copy()

            qacc = self.init_qacc.copy()
            ctrl = self.init_ctrl.copy()

        else:
            pass
            """
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start: start + datum_dim]
                setattr(self.model.data, datum_name, datum)
                start += datum_dim
            """
        self.set_state(qp, qv)

    def reset(self, init_state=None):

        # self.reset_mujoco(init_state)
        self.sim.reset()
        self.sim.forward()

        self.current_com = self.data.subtree_com[0]
        self.dcom = np.zeros_like(self.current_com)

        return self.get_current_obs()

    def set_state(self, qpos, qvel, qacc):
        assert qpos.shape == (self.qpos_dim, ) and qvel.shape == (
            self.qvel_dim, ) and qacc.shape == (self.qacc_dim, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.data.qpos,
            self.data.qvel,
            self.data.qacc,
            self.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        ctrl = self.inject_action_noise(action)
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]

        for _ in range(self.frame_skip):
            self.sim.step()

        new_com = self.data.subtree_com[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self, config=None):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            # self.viewer.start()
            # self.viewer.set_model(self.model)
        if config is not None:
            pass
            # self.viewer.set_window_pose(config["xpos"], config["ypos"])
            # self.viewer.set_window_size(config["width"], config["height"])
            # self.viewer.set_window_title(config["title"])
        return self.viewer

    def render(self, close=False, mode='human', config=None):
        if mode == 'human':
            # viewer = self.get_viewer(config=config)
            try:
                self.viewer.render()

            except:
                self.get_viewer(config=config)
                self.viewer.render()
        elif mode == 'rgb_array':
            viewer = self.get_viewer(config=config)
            viewer.loop_once()
            # self.get_viewer(config=config).render()
            data, width, height = self.get_viewer(config=config).get_image()
            return np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        if close:
            self.stop_viewer()

    # def start_viewer(self):
    #     viewer = self.get_viewer()
    #     if not viewer.running:
    #         viewer.start()
    #
    # def stop_viewer(self):
    #     if self.viewer:
    #         self.viewer.finish()
    #         self.viewer = None

    # def release(self):
    #     # temporarily alleviate the issue (but still some leak)
    #     from learning_to_adapt.mujoco_py.mjlib import mjlib
    #     mjlib.mj_deleteModel(self.model._wrapped)
    #     mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.ximat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)

        ## _compute_subtree
        body_vels = np.zeros((self.model.nbody, 6))
        # bodywise quantities
        mass = self.model.body_mass.flatten()
        for i in range(self.model.nbody):
            # body velocity
            # Compute object 6D velocity in object-centered frame, world/local orientation.
            # mj_objectVelocity(const mjModel* m, const mjData* d, int objtype, int objid, mjtMum* res, int flg_local)
            mujoco_py.cymj._mj_objectVelocity(self.model, self.data, 1, i,
                                              body_vels[i], 0)
        lin_moms = body_vels[:, 3:] * mass.reshape((-1, 1))

        # init subtree mass
        body_parentid = self.model.body_parentid
        # subtree com and com_vel
        for i in range(self.model.nbody - 1, -1, -1):
            if i > 0:
                parent = body_parentid[i]
                # add scaled velocities
                lin_moms[parent] += lin_moms[i]
                # accumulate mass
                mass[parent] += mass[i]
        return_ = lin_moms / mass.reshape((-1, 1))
        return return_[idx]

        # return self.model.body_comvels[idx]

    # def get_body_comvel(self, body_name):
    #     idx = self.model.body_names.index(body_name)
    #
    #     return self.model.body_comvels[idx]

    # def print_stats(self):
    #     super(MujocoEnv, self).print_stats()
    #     print("qpos dim:\t%d" % len(self.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError

    # def set_state_tmp(self, state, restore=True):
    #     if restore:
    #         prev_pos = self.data.qpos
    #         prev_qvel = self.data.qvel
    #         prev_ctrl = self.data.ctrl
    #         prev_act = self.data.act
    #     qpos, qvel = self.decode_state(state)
    #     self.model.data.qpos = qpos
    #     self.model.data.qvel = qvel
    #     self.model.forward()
    #     yield
    #     if restore:
    #         self.data.qpos = prev_pos
    #         self.data.qvel = prev_qvel
    #         self.data.ctrl = prev_ctrl
    #         self.data.act = prev_act
    #         self.model.forward()

    def get_param_values(self):
        return {}

    def set_param_values(self, values):
        pass
Пример #26
0
class BaseMujocoHexRobot(HexRobot):
    """The hexapod robot in mujoco. Child classes need to define the joint names."""
    @classmethod
    @abc.abstractproperty
    def joint_names(cls) -> Tuple[str, ...]:
        """The names of the joints in the mujoco xml."""
        raise NotImplementedError

    def __init__(self, load_path: str, viewer: bool = True):
        """Initialize a hex robot in mujoco.

        Parameters
        ---------
        load_path : str
            Path to the xml file.
        viewer : bool
            Flag to create a MjViewer for the robot. By default a viewer will be created.
        """
        model = load_model_from_path(load_path)
        self._sim = MjSim(model)

        self.num_joints = len(self.joint_names)

        self._joint_qpos_ids = [
            self._sim.model.get_joint_qpos_addr(x) for x in self.joint_names
        ]
        self._joint_qvel_ids = [
            self._sim.model.get_joint_qvel_addr(x) for x in self.joint_names
        ]
        self._joint_actuator_id_map = dict(
            zip(self.joint_names, range(self.num_joints)))

        if viewer:
            self._viewer = MjViewer(self._sim)
        else:
            self._viewer = None

    def get_state(self) -> HexState:
        """Retrieve the current state of the robot.

        Returns
        -------
        HexState
            The current state of the robot.
        """
        sim_state = self._sim.get_state()
        joint_pos = sim_state.qpos[self._joint_qpos_ids]
        joint_vel = sim_state.qvel[self._joint_qvel_ids]
        return HexState(
            qpos=OrderedDict(zip(self.joint_names, joint_pos)),
            qvel=OrderedDict(zip(self.joint_names, joint_vel)),
            acceleration=np.zeros(3),
        )  # TODO get the acceleration.

    def set_joint_positions(self, joints: Dict[str, float]) -> None:
        """Sets the joint positions to the specified values

        Parameters
        ----------
        joints : Dict[str, float]
            A mapping between the joint name and its value to be set.
        """
        sim_state = self._sim.get_state()
        for name, value in joints.items():
            joint_id = self._sim.model.get_joint_qpos_addr(name)
            sim_state.qpos[joint_id] = value
        self._sim.set_state(sim_state)
        self._sim.forward()

    def step(self) -> None:
        """Steps the simulation forward by one step.

        Updates the visualizer if one is available.
        """
        self._sim.step()
        if self._viewer is not None:
            self._viewer.render()

    def set_command(self, command: Dict[str, float]) -> None:
        """Set the command of the robot.

        Parameters
        ----------
        command : Dict[str, float]
            The commands to the robot.
        """
        for name, value in command.items():
            self._sim.data.ctrl[self._joint_actuator_id_map[name]] = value
class DeterministicGraspPolicy(Policy):
    def __init__(self, agentparams, policyparams):
        Policy.__init__(self)
        self.agentparams = agentparams
        self.min_lift = agentparams.get('min_z_lift', 0.05)

        self.policyparams = policyparams
        self.adim = agentparams['adim']
        self.n_actions = policyparams['nactions']

        if 'num_samples' in self.policyparams:
            self.M = self.policyparams['num_samples']
        else:
            self.M = 20  # number of CEM Samples

        if 'best_to_take' in self.policyparams:
            self.K = self.policyparams['best_to_take']
        else:
            self.K = 5  # best samples to average for next sampling

        assert self.adim >= 4, 'must have at least x, y, z + gripper actions'

        self.moveto = True
        self.drop = False
        self.grasp = False
        self.lift = False

        if 'iterations' in self.policyparams:
            self.niter = self.policyparams['iterations']
        else:
            self.niter = 10  # number of iterations
        self.imgs = []
        self.iter = 0

    def setup_CEM_model(self, t, init_model):
        if t == 0:
            if 'gen_xml' in self.agentparams:

                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['gen_xml_fname']))
            else:
                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['filename']))

        self.initial_qpos = init_model.data.qpos.copy()
        self.initial_qvel = init_model.data.qvel.copy()

        self.reset_CEM_model()

    def reset_CEM_model(self):
        if len(self.imgs) > 0:
            print('saving iter', self.iter, 'with frames:', len(self.imgs))
            npy_to_gif(
                self.imgs,
                os.path.join(self.agentparams['record'],
                             'iter_{}'.format(self.iter)))
            self.iter += 1

        sim_state = self.CEM_model.get_state()
        sim_state.qpos[:] = self.initial_qpos.copy()
        sim_state.qvel[:] = self.initial_qvel.copy()
        self.CEM_model.set_state(sim_state)

        self.prev_target = self.CEM_model.data.qpos[:self.adim].squeeze().copy(
        )
        self.target = self.CEM_model.data.qpos[:self.adim].squeeze().copy()

        for _ in range(5):
            self.step_model(self.target)

        self.imgs = []

    def viewer_refresh(self):
        if 'debug_viewer' in self.policyparams and self.policyparams[
                'debug_viewer']:
            large_img = self.CEM_model.render(
                640, 480, camera_name="maincam")[::-1, :, :]
            self.imgs.append(large_img)

    def perform_CEM(self, targetxy):
        self.reset_CEM_model()

        if 'object_meshes' in self.agentparams:
            targetxy = self.CEM_model.data.sensordata[:2].squeeze().copy()
        print('Beginning CEM')
        ang_dis_mean = self.policyparams['init_mean'].copy()
        ang_dis_cov = self.policyparams['init_cov'].copy()
        scores = np.zeros(self.M)
        best_score, best_ang, best_xy = -1, None, None

        for n in range(self.niter):
            ang_disp_samps = np.random.multivariate_normal(ang_dis_mean,
                                                           ang_dis_cov,
                                                           size=self.M)

            for s in range(self.M):
                #print('On iter', n, 'sample', s)
                self.reset_CEM_model()
                move = True
                drop = False
                grasp = False
                g_start = 0
                lift = False

                angle_delta = ang_disp_samps[s, 0]
                targetxy_delta = targetxy + ang_disp_samps[s, 1:]
                print('init iter')
                print(targetxy)
                print(angle_delta, targetxy_delta)
                for t in range(self.n_actions):
                    angle_action = np.zeros(self.adim)
                    cur_xy = self.CEM_model.data.qpos[:2].squeeze()

                    if move and np.linalg.norm(
                            targetxy_delta - cur_xy,
                            2) <= self.policyparams['drop_thresh']:
                        move = False
                        drop = True

                    if drop and self.CEM_model.data.qpos[2] <= -0.079:
                        drop = False
                        grasp = True
                        g_start = t
                    if grasp and t - g_start > 2:
                        grasp = False
                        lift = True
                    if move:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif drop:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif grasp:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = 21
                    elif lift:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = 21

                    self.step_model(angle_action)
                # print 'final z', self.CEM_model.data.qpos[8].squeeze(), 'with angle', angle_samps[s]

                if 'object_meshes' in self.agentparams:
                    obj_z = self.CEM_model.data.sensordata[2].squeeze()
                else:
                    obj_z = self.CEM_model.data.qpos[8].squeeze()

                print('obj_z at iter', n * self.M + s, 'is', obj_z)

                scores[s] = obj_z

                if 'stop_iter_thresh' in self.policyparams and scores[
                        s] > self.policyparams['stop_iter_thresh']:
                    return ang_disp_samps[s, 0], ang_disp_samps[s, 1:]
                # print 'score',scores[s]

            best_scores = np.argsort(-scores)[:self.K]

            if scores[best_scores[0]] > best_score or best_ang is None:

                #print('best', scores[best_scores[0]])

                best_score = scores[best_scores[0]]
                best_ang = ang_disp_samps[best_scores[0], 0]
                best_xy = ang_disp_samps[best_scores[0], 1:]

            ang_dis_mean = np.mean(ang_disp_samps[best_scores, :], axis=0)
            ang_dis_cov = np.cov(ang_disp_samps[best_scores, :].T)

        return best_ang, best_xy

    def step_model(self, input_actions):
        pos_clip = self.agentparams['targetpos_clip']

        self.prev_target = self.target.copy()
        self.target = input_actions.copy()
        self.target = np.clip(self.target, pos_clip[0], pos_clip[1])

        for s in range(self.agentparams['substeps']):
            step_action = s / float(self.agentparams['substeps']) * (
                self.target - self.prev_target) + self.prev_target
            self.CEM_model.data.ctrl[:] = step_action
            self.CEM_model.step()

        self.viewer_refresh()

        #print "end", self.CEM_model.data.qpos[:4].squeeze()

    def act(self,
            traj,
            t,
            init_model=None,
            goal_object_pose=None,
            hyperparams=None,
            goal_image=None):
        """
        Scripted pick->place->wiggle trajectory. There's probably a better way to script this
        but booleans will suffice for now.
        """
        self.setup_CEM_model(t, init_model)

        if t == 0:
            self.moveto = True
            self.drop = False
            self.lift = False
            self.grasp = False
            self.second_moveto = False
            self.final_drop = False
            self.wiggle = False
            self.switchTime = 0
            print('start pose', traj.Object_pose[t, 0, :3])
            self.targetxy = traj.Object_pose[t, 0, :2]
            self.angle, self.disp = self.perform_CEM(self.targetxy)
            print('best angle', self.angle, 'best target', self.targetxy)
            self.targetxy += self.disp
            traj.desig_pos = np.zeros((2, 2))
            traj.desig_pos[0] = self.targetxy.copy()

        if self.lift and traj.X_full[t, 2] >= self.min_lift:
            self.lift = False

            if traj.Object_full_pose[t, 0, 2] > -0.01:  #lift occursed
                self.second_moveto = True
                self.targetxy = np.random.uniform(-0.2, 0.2, size=2)
                print("LIFTED OBJECT!")
                print('dropping at', self.targetxy)
                traj.desig_pos[1] = self.targetxy.copy()
            else:
                self.wiggle = True

        if self.grasp and self.switchTime > 0:
            print('lifting at time', t, '!', 'have z', traj.X_full[t, 2])

            self.grasp = False
            self.lift = True

        if self.drop and (traj.X_full[t, 2] <= -0.079 or self.switchTime >= 2):
            print('grasping at time', t, '!', 'have z', traj.X_full[t, 2])
            self.drop = False
            self.grasp = True
            self.switchTime = 0

        if self.moveto and (np.linalg.norm(traj.X_full[t, :2] - self.targetxy,
                                           2) <=
                            self.policyparams['drop_thresh']):
            if self.switchTime > 0:
                print('stopping moveto at time', t, '!')
                print(traj.X_full[t, :2], self.targetxy)
                self.moveto = False
                self.drop = True
                self.switchTime = 0
            else:
                self.switchTime += 1

        if self.second_moveto and np.linalg.norm(
                traj.X_full[t, :2] - self.targetxy,
                2) <= self.policyparams['drop_thresh']:
            self.second_moveto = False
            self.final_drop = True
            self.switchTime = 0

        actions = np.zeros(self.adim)
        if self.moveto or self.second_moveto:
            delta = np.zeros(3)
            delta[:2] = self.targetxy - traj.target_qpos[t, :2]

            if 'xyz_std' in self.policyparams:
                delta += self.policyparams['xyz_std'] * np.random.normal(
                    size=3)

            norm = np.sqrt(np.sum(np.square(delta)))
            if norm > self.policyparams['max_norm']:
                delta *= self.policyparams['max_norm'] / norm

            actions[:3] = traj.target_qpos[t, :3] + delta
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle

            if self.moveto:
                actions[-1] = -1
            else:
                actions[-1] = 1

            self.switchTime += 1

        elif self.drop:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = -1
            self.switchTime += 1

        elif self.lift:
            actions[:2] = self.targetxy
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle
            actions[-1] = 1

        elif self.grasp:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = 1
            self.switchTime += 1
        elif self.final_drop:

            if self.switchTime > 0:
                print('opening')
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

            if self.switchTime > 1:
                print('up')
                actions[:2] = self.targetxy
                actions[2] = self.agentparams['ztarget'] + np.random.uniform(
                    -0.03, 0.05)
                actions[3] = self.angle + np.random.uniform(
                    -np.pi / 4., np.pi / 4.)
                actions[-1] = -1
                self.final_drop = False
                self.wiggle = True
                self.switchTime = 0
            else:
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

        elif self.wiggle:
            delta_vec = np.random.normal(size=2)
            norm = np.sqrt(np.sum(np.square(delta_vec)))
            if norm > self.policyparams['max_norm']:
                delta_vec *= self.policyparams['max_norm'] / norm
            actions[:2] = np.clip(traj.target_qpos[t, :2] + delta_vec, -0.15,
                                  0.15)
            delta_z = np.random.uniform(-0.08 - traj.target_qpos[t, 2],
                                        0.3 - traj.target_qpos[t, 2])
            actions[2] = traj.target_qpos[t, 2] + delta_z
            actions[3] = traj.target_qpos[t, 3] + np.random.uniform(-0.1, 0.1)
            if np.random.uniform() > 0.5:
                actions[4] = -1
            else:
                actions[4] = 1

        if 'angle_std' in self.policyparams:
            actions[3] += self.policyparams['angle_std'] * np.random.normal()

        return actions - traj.target_qpos[t, :] * traj.mask_rel
Пример #28
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip=1, xml_string=""):
        """
        @param model_path path of the default model
        @param xml_string if given, the model will be reset using these values
        """

        fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.model = load_model_from_path(fullpath)
        with open(fullpath, 'r') as f:
            self.model_xml = f.read()
            self.default_xml = self.model_xml

        if xml_string != "":
            self.model = load_model_from_xml(xml_string)
            self.model_xml = xml_string

        self.frame_skip = frame_skip

        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
        self.set_param_space()

    def get_params(self):
        """
        Returns a dict of (param_name, param_value)
        """
        return MujocoUpdater(self.model_xml).get_params()

    def set_params(self, params):
        """
        @param params: dict(param_name, param_value)
        param_name should be a string of bodyname__type__paramname
        where type is either geom or joint,
        e.g. thigh__joint__friction,
        and param_value is a numpy array
        """
        # invalidate cached properties
        self.__dict__.pop('action_space', None)
        self.__dict__.pop('observation_space', None)

        new_xml = MujocoUpdater.set_params(self.model_xml, params)
        self.__init__(xml_string=new_xml)
        self.reset()
        return self

    def set_param_space(self, param_space=None, eps_scale=0.5, replace=True):
        """
        Set param_space
        @param param_space: dict(string, gym.space.base.Space)
        @param eps_scale: scale of variation applied to all params
        @param replace: if true, param_space overwrites default param_space.
                        Default behavior is to merge.
        """
        if param_space is not None:
            if replace:
                self._param_space = param_space
            else:
                self._param_space = {**self._param_space, **param_space}
        else:
            params = MujocoUpdater(self.model_xml).get_params()
            self._param_space = dict()
            for param, value in params.items():
                eps = np.abs(value) * eps_scale
                ub = value + eps
                lb = value - eps
                for name in positive_params:
                    if name in param:
                        lb = np.clip(lb, 1e-3, ub)
                        break
                space = spaces.Box(lb, ub)
                self._param_space[param] = space

    def get_geom_params(self, body_name):
        geom = MujocoUpdater(self.model_xml).get_geom(body_name)
        return {
            k: v
            for k, v in geom.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_joint_params(self, body_name):
        joint = MujocoUpdater(self.model_xml).get_joint(body_name)
        return {
            k: v
            for k, v in joint.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_body_names(self):
        return MujocoUpdater(self.model_xml).get_body_names()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def reset(self, randomize=True):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    # Added for bayesian_rl
    def get_sim_state(self):
        return self.sim.get_state()

    # Added for bayesian_rl
    def set_sim_state(self, state):
        self.sim.set_state(state)

    # Added for bayesian_rl
    def set_state_vector(self, state_vector):
        qpos = state_vector[:self.model.nq]
        qvel = state_vector[self.model.nq:]
        self.set_state(qpos, qvel)

    # Added for bayesian_rl
    def get_state_vector(self):
        return self.state_vector()

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer

    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}

    # def step(self, a):
    #     return self._step(a)

    # Added for bayesian_rl
    def take_action(self, a):
        self.step(a)
        return self.get_sim_state()

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['mean']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=640,
                                             height=480,
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + '_' + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))

    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        if mode == 'rgb_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()
Пример #29
0
model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
viewer = MjViewer(sim)

states = [{'box:x': +0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': -0.8},
          {'box:x': +0.8, 'box:y': -0.8},
          {'box:x': +0.0, 'box:y': +0.0}]

# MjModel.joint_name2id returns the index of a joint in
# MjData.qpos.
x_joint_i = sim.model.get_joint_qpos_addr("box:x")
y_joint_i = sim.model.get_joint_qpos_addr("box:y")

print_box_xpos(sim)

while True:
    for state in states:
        sim_state = sim.get_state()
        sim_state.qpos[x_joint_i] = state["box:x"]
        sim_state.qpos[y_joint_i] = state["box:y"]
        sim.set_state(sim_state)
        sim.forward()
        print("updated state to", state)
        print_box_xpos(sim)
        viewer.render()

    if os.getenv('TESTING') is not None:
        break
Пример #30
0
class GripperTester:
    """
    A class that is used to test gripper

    Args:
        gripper (GripperModel): A gripper instance to be tested
        pos (str): (x y z) position to place the gripper in string form, e.g. '0 0 0.3'
        quat (str): rotation to apply to gripper in string form, e.g. '0 0 1 0' to flip z axis
        gripper_low_pos (float): controls the gipper y position, larger -> higher
        gripper_high_pos (float): controls the gipper y high position larger -> higher,
            must be larger than gripper_low_pos
        box_size (None or 3-tuple of int): the size of the box to grasp, None defaults to [0.02, 0.02, 0.02]
        box_density (int): the density of the box to grasp
        step_time (int): the interval between two gripper actions
        render (bool): if True, show rendering
    """

    def __init__(
        self,
        gripper,
        pos,
        quat,
        gripper_low_pos,
        gripper_high_pos,
        box_size=None,
        box_density=10000,
        step_time=400,
        render=True,
    ):
        # define viewer
        self.viewer = None

        world = MujocoWorldBase()
        # Add a table
        arena = TableArena(table_full_size=(0.4, 0.4, 0.1), table_offset=(0, 0, 0.1), has_legs=False)
        world.merge(arena)

        # Add a gripper
        self.gripper = gripper
        # Create another body with a slider joint to which we'll add this gripper
        gripper_body = ET.Element("body")
        gripper_body.set("pos", pos)
        gripper_body.set("quat", quat)  # flip z
        gripper_body.append(new_joint(name="gripper_z_joint", type="slide", axis="0 0 -1", damping="50"))
        # Add all gripper bodies to this higher level body
        for body in gripper.worldbody:
            gripper_body.append(body)
        # Merge the all of the gripper tags except its bodies
        world.merge(gripper, merge_body=None)
        # Manually add the higher level body we created
        world.worldbody.append(gripper_body)
        # Create a new actuator to control our slider joint
        world.actuator.append(new_actuator(joint="gripper_z_joint", act_type="position", name="gripper_z", kp="500"))

        # Add an object for grasping
        # density is in units kg / m3
        TABLE_TOP = [0, 0, 0.09]
        if box_size is None:
            box_size = [0.02, 0.02, 0.02]
        box_size = np.array(box_size)
        self.cube = BoxObject(
            name="object", size=box_size, rgba=[1, 0, 0, 1], friction=[1, 0.005, 0.0001], density=box_density
        )
        object_pos = np.array(TABLE_TOP + box_size * [0, 0, 1])
        mujoco_object = self.cube.get_obj()
        # Set the position of this object
        mujoco_object.set("pos", array_to_string(object_pos))
        # Add our object to the world body
        world.worldbody.append(mujoco_object)

        # add reference objects for x and y axes
        x_ref = BoxObject(
            name="x_ref", size=[0.01, 0.01, 0.01], rgba=[0, 1, 0, 1], obj_type="visual", joints=None
        ).get_obj()
        x_ref.set("pos", "0.2 0 0.105")
        world.worldbody.append(x_ref)
        y_ref = BoxObject(
            name="y_ref", size=[0.01, 0.01, 0.01], rgba=[0, 0, 1, 1], obj_type="visual", joints=None
        ).get_obj()
        y_ref.set("pos", "0 0.2 0.105")
        world.worldbody.append(y_ref)

        self.world = world
        self.render = render
        self.simulation_ready = False
        self.step_time = step_time
        self.cur_step = 0
        if gripper_low_pos > gripper_high_pos:
            raise ValueError(
                "gripper_low_pos {} is larger " "than gripper_high_pos {}".format(gripper_low_pos, gripper_high_pos)
            )
        self.gripper_low_pos = gripper_low_pos
        self.gripper_high_pos = gripper_high_pos

    def start_simulation(self):
        """
        Starts simulation of the test world
        """
        model = self.world.get_model(mode="mujoco_py")

        self.sim = MjSim(model)
        if self.render:
            self.viewer = MjViewer(self.sim)
        self.sim_state = self.sim.get_state()

        # For gravity correction
        gravity_corrected = ["gripper_z_joint"]
        self._gravity_corrected_qvels = [self.sim.model.get_joint_qvel_addr(x) for x in gravity_corrected]

        self.gripper_z_id = self.sim.model.actuator_name2id("gripper_z")
        self.gripper_z_is_low = False

        self.gripper_actuator_ids = [self.sim.model.actuator_name2id(x) for x in self.gripper.actuators]

        self.gripper_is_closed = True

        self.object_id = self.sim.model.body_name2id(self.cube.root_body)
        object_default_pos = self.sim.data.body_xpos[self.object_id]
        self.object_default_pos = np.array(object_default_pos, copy=True)

        self.reset()
        self.simulation_ready = True

    def reset(self):
        """
        Resets the simulation to the initial state
        """
        self.sim.set_state(self.sim_state)
        self.cur_step = 0

    def close(self):
        """
        Close the viewer if it exists
        """
        if self.viewer is not None:
            self.viewer.close()

    def step(self):
        """
        Forward the simulation by one timestep

        Raises:
            RuntimeError: if start_simulation is not yet called.
        """
        if not self.simulation_ready:
            raise RuntimeError("Call start_simulation before calling step")
        if self.gripper_z_is_low:
            self.sim.data.ctrl[self.gripper_z_id] = self.gripper_low_pos
        else:
            self.sim.data.ctrl[self.gripper_z_id] = self.gripper_high_pos
        if self.gripper_is_closed:
            self._apply_gripper_action(1)
        else:
            self._apply_gripper_action(-1)
        self._apply_gravity_compensation()
        self.sim.step()
        if self.render:
            self.viewer.render()
        self.cur_step += 1

    def _apply_gripper_action(self, action):
        """
        Applies binary gripper action

        Args:
            action (int): Action to apply. Should be -1 (open) or 1 (closed)
        """
        gripper_action_actual = self.gripper.format_action(np.array([action]))
        # rescale normalized gripper action to control ranges
        ctrl_range = self.sim.model.actuator_ctrlrange[self.gripper_actuator_ids]
        bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0])
        weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0])
        applied_gripper_action = bias + weight * gripper_action_actual
        self.sim.data.ctrl[self.gripper_actuator_ids] = applied_gripper_action

    def _apply_gravity_compensation(self):
        """
        Applies gravity compensation to the simulation
        """
        self.sim.data.qfrc_applied[self._gravity_corrected_qvels] = self.sim.data.qfrc_bias[
            self._gravity_corrected_qvels
        ]

    def loop(self, total_iters=1, test_y=False, y_baseline=0.01):
        """
        Performs lower, grip, raise and release actions of a gripper,
                each separated with T timesteps

        Args:
            total_iters (int): Iterations to perform before exiting
            test_y (bool): test if object is lifted
            y_baseline (float): threshold for determining that object is lifted
        """
        seq = [(False, False), (True, False), (True, True), (False, True)]
        for cur_iter in range(total_iters):
            for cur_plan in seq:
                self.gripper_z_is_low, self.gripper_is_closed = cur_plan
                for step in range(self.step_time):
                    self.step()
            if test_y:
                if not self.object_height > y_baseline:
                    raise ValueError(
                        "object is lifed by {}, ".format(self.object_height)
                        + "not reaching the requirement {}".format(y_baseline)
                    )

    @property
    def object_height(self):
        """
        Queries the height (z) of the object compared to on the ground

        Returns:
            float: Object height relative to default (ground) object position
        """
        return self.sim.data.body_xpos[self.object_id][2] - self.object_default_pos[2]
class my_env(parameterized.TestCase):
    def __init__(self):
        # super(lab_env, self).__init__(env)
        # 导入xml文档
        self.model = load_model_from_path("assets/simpleEE_4box.xml")
        # 调用MjSim构建一个basic simulation
        self.sim = MjSim(model=self.model)
        self.sim = MjSim(self.model)

        self.viewer = MjViewer(self.sim)
        self.viewer._run_speed = 0.001
        self.timestep = 0
        # Sawyer Peg
        #self.init_qpos = np.array([-0.305, -0.83, 0.06086, 1.70464, -0.02976, 0.62496, -0.04712])
        # Flexiv Peg
        self.init_qpos = np.array([-0.22, -0.43, 0.449, -2, -0.25, 0.799, 0.99])

        for i in range(len(self.sim.data.qpos)):
            self.sim.data.qpos[i] = self.init_qpos[i]
        self.testQposFromSitePose(
            (np.array([0.57, 0.075, 0.08]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
            _INPLACE, True)

        print(self.sim.data.ctrl)
        print(self.sim.data.qpos)

    def get_state(self):
        self.sim.get_state()
        # 如果定义了相机
        # self.sim.data.get_camera_xpos('[camera name]')

    def reset(self):
        self.sim.reset()
        self.timestep = 0

    def step(self):
        # self.testQposFromSitePose((np.array([0.605, 0.075, 0.03]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])), _INPLACE)
        x=random.uniform(0.415, 0.635)
        y=random.uniform(-0.105, 0.105)

        self.testQposFromSitePose(
            (np.array([x, y, 0.045]), np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
            _INPLACE)
        # self.testQposFromSitePose(
        #     (None, np.array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17])),
        #     _INPLACE, True)
        self.sim.step()
        # self.sim.data.ctrl[0] += 0.01
        # print(self.sim.data.ctrl)
        # pdb.set_trace()
        # print(self.sim.data.qpos)
        print("sensordata", self.sim.data.sensordata)
        # self.viewer.add_overlay(const.GRID_TOPRIGHT, " ", SESSION_NAME)
        self.viewer.render()
        self.timestep += 1

    def create_viewer(self, run_speed=0.0005):
        self.viewer = MjViewer(self.sim)
        self.viewer._run_speed = run_speed
        # self.viewer._hide_overlay = HIDE_OVERLAY
        # self.viewer.vopt.frame = DISPLAY_FRAME
        # self.viewer.cam.azimuth = CAM_AZIMUTH
        # self.viewer.cam.distance = CAM_DISTANCE
        # self.viewer.cam.elevation = CAM_ELEVATION

    def testQposFromSitePose(self, target, inplace, qpos_flag=False):

        physics = mujoco.Physics.from_xml_string(FlexivPeg_XML)
        target_pos, target_quat = target
        count = 0
        physics2 = physics.copy(share_model=True)
        resetter = _ResetArm(seed=0)
        while True:
          result = ik.qpos_from_site_pose(
              physics=physics2,
              site_name=_SITE_NAME,
              target_pos=target_pos,
              target_quat=target_quat,
              joint_names=_JOINTS,
              tol=_TOL,
              max_steps=_MAX_STEPS,
              inplace=inplace,
          )

          if result.success:
            break
          elif count < _MAX_RESETS:
            resetter(physics2)
            count += 1
          else:
            raise RuntimeError(
                'Failed to find a solution within %i attempts.' % _MAX_RESETS)

        self.assertLessEqual(result.steps, _MAX_STEPS)
        self.assertLessEqual(result.err_norm, _TOL)
        # pdb.set_trace()
        physics.data.qpos[:] = result.qpos
        for i in range(len(self.sim.data.qpos)):
            if qpos_flag:
                self.sim.data.qpos[i]=physics.data.qpos[i]
            else:
                self.sim.data.ctrl[i] = physics.data.qpos[i]
        # print(physics.data.qpos)
        mjlib.mj_fwdPosition(physics.model.ptr, physics.data.ptr)
        if target_pos is not None:
          pos = physics.named.data.site_xpos[_SITE_NAME]
          np.testing.assert_array_almost_equal(pos, target_pos)
        if target_quat is not None:
          xmat = physics.named.data.site_xmat[_SITE_NAME]
          quat = np.empty_like(target_quat)
          mjlib.mju_mat2Quat(quat, xmat)
          quat /= quat.ptp()  # Normalize xquat so that its max-min range is 1
Пример #32
0
def print_state(state):
    time, qpos, qvel, act, udd_state = state.time, state.qpos, state.qvel, state.act, state.udd_state
    print("t: %5.3f" % time)
    print("qpos: ", qpos)
    print("qvel: ", qvel)
    print("tosser (slide,hinge): ", qpos[:2])
    print("object (z,y,pitch): ", qpos[-3:])


model = load_model_from_path("../../xmls/tosser.xml")
sim = MjSim(model)

viewer = MjViewer(sim)

sim_state = sim.get_state()

while True:
    sim.set_state(sim_state)

    for i in range(1000):
        state = sim.get_state()
        # time, qpos, qvel, act, udd_state = state.time, state.qpos, state.qvel, state.act, state.udd_state
        # print(time, qpos, qvel)
        print_state(state)
        if i < 150:
            sim.data.ctrl[0] = -0.0
            sim.data.ctrl[1] = -0.0
        else:
            sim.data.ctrl[0] = -1.0
            sim.data.ctrl[1] = -1.0
Пример #33
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))