Пример #1
0
def find_contact_height(sim: mj.MjSim, iterations: int = 10) -> float:
    """ Find the height we should put our walker at so we are just barely in contact with the ground.

        Runs a binary search on height (assumed to be state[1][2]) such that the body with the minimum z coordinate
        on the ground is just barely in contact with the ground.


        Args:
            sim: the instantiated sim MjSim object you want to find the correct height for
            iterations: number of times to run_util the binary search
        Returns:
            just the height as a float, you'll need to set it yourself

    """

    state = sim.get_state()
    height_guess = state[1][1]
    step = height_guess * 2  # upper range for how much we will search for

    for _ in range(iterations):
        if sim.data.ncon:  # ncon is the number of collisions
            height_guess += step
        else:
            height_guess -= step

        state[1][1] = height_guess
        sim.set_state(state)
        sim.forward()
        step /= 2

    return height_guess
Пример #2
0
    def play(self, mocap_filepath):
        from mujoco_py import load_model_from_xml, MjSim, MjViewer

        curr_path = getcwd()
        xmlpath = '/mujoco/humanoid_deepmimic/envs/asset/dp_env_v2.xml'
        with open(curr_path + xmlpath) as fin:
            MODEL_XML = fin.read()

        model = load_model_from_xml(MODEL_XML)
        sim = MjSim(model)
        viewer = MjViewer(sim)

        self.read_raw_data(mocap_filepath)
        self.convert_raw_data()

        from time import sleep

        phase_offset = np.array([0.0, 0.0, 0.0])

        while True:
            for k in range(len(self.data)):
                tmp_val = self.data_config[k]
                sim_state = sim.get_state()
                sim_state.qpos[:] = tmp_val[:]
                sim_state.qpos[:3] += phase_offset[:]
                sim.set_state(sim_state)
                sim.forward()
                viewer.render()

            sim_state = sim.get_state()
            phase_offset = sim_state.qpos[:3]
            phase_offset[2] = 0
Пример #3
0
def test_tableBoundViolation():
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    test1 = np.array([-0.0031, -0.9718, -0.7269, -2.4357, -0.0157, 1.5763, 0.7303]) #False
    test2 = np.array([0.0264, -0.0772, 0.1924, -2.8478, -0.0273, 2.8339, 0.7566]) #True
    test3 = np.array([-1.4870, -1.7289, 1.6138, -1.9814, -0.9856, 1.9304, 0.9981]) #True
    test4 = np.array([-0.5250, -0.6410, 0.1893, -1.3827, -0.2573, 2.1356, 0.7116]) #False
    test5 = np.array([-0.0133, 0.9846, 0.0365, -1.5491, 2.8629, 0.7630, 0.6254]) #True

    qd = np.zeros(7)

    state = MjSimState(time=0,qpos=test1,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test1:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test2,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test2:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test3,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test3:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test4,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test4:", "Passed" if not bounds.tableBoundViolation(sim) else "Failed")
    state = MjSimState(time=0,qpos=test5,qvel=qd,act=None,udd_state={})
    sim.set_state(state)
    sim.step()
    print("Test5:", "Passed" if bounds.tableBoundViolation(sim) else "Failed")
Пример #4
0
def run_activations_fcn(est_activations,
                        model_ver=0,
                        timestep=0.005,
                        Mj_render=False):
    #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed
    """
	this function runs the predicted activations generatred from running
	the inverse map on the desired task kinematics
	"""

    #loading data
    #print("loading data")
    #task_kinematics=np.load("task_kinematics.npy")
    #est_task_activations=np.load("est_task_activations.npy")

    model = load_model_from_path(
        "./models/nmi_leg_w_chassis_v{}.xml".format(model_ver))
    sim = MjSim(model)
    if Mj_render:
        viewer = MjViewer(sim)
        # viewer.cam.fixedcamid += 1
        # viewer.cam.type = const.CAMERA_FIXED
    sim_state = sim.get_state()
    control_vector_length = sim.data.ctrl.__len__()
    print("control_vector_length: " + str(control_vector_length))

    number_of_task_samples = est_activations.shape[0]

    real_attempt_positions = np.zeros((number_of_task_samples, 2))
    real_attempt_activations = np.zeros((number_of_task_samples, 3))
    chassis_pos = np.zeros(number_of_task_samples, )
    sim.set_state(sim_state)
    for ii in range(number_of_task_samples):
        sim.data.ctrl[:] = est_activations[ii, :]
        sim.step()
        current_positions_array = sim.data.qpos[-2:]

        # current_kinematics_array=np.array(
        # 	[sim.data.qpos[0],
        # 	sim.data.qvel[0],
        # 	sim.data.qacc[0],
        # 	sim.data.qpos[1],
        # 	sim.data.qvel[1],
        # 	sim.data.qacc[1]]
        # 	)
        chassis_pos[ii] = sim.data.get_geom_xpos("Chassis_frame")[0]
        real_attempt_positions[ii, :] = current_positions_array
        real_attempt_activations[ii, :] = sim.data.ctrl
        if Mj_render:
            viewer.render()
    real_attempt_kinematics = positions_to_kinematics_fcn(
        real_attempt_positions[:, 0],
        real_attempt_positions[:, 1],
        timestep=0.005)
    return real_attempt_kinematics, real_attempt_activations, chassis_pos
Пример #5
0
def run_activations_fcn(MuJoCo_model_name, est_activations, timestep=0.01, Mj_render=False):
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! the q0 is now the chasis pos. needs to be fixed
	"""
	this function runs the predicted activations generatred from running
	the inverse map on the desired task kinematics
	"""

	#loading data
	#print("loading data")
	#task_kinematics=np.load("task_kinematics.npy")
	#est_task_activations=np.load("est_task_activations.npy")

	MuJoCo_model = load_model_from_path("./models/"+MuJoCo_model_name)
	sim = MjSim(MuJoCo_model)
	if Mj_render:
		viewer = MjViewer(sim)
		# to move it to the mounted camera
		viewer.cam.fixedcamid += 1
		viewer.cam.type = const.CAMERA_FIXED
		# # to record the video
		# viewer._record_video = True
		# # recording path
		# viewer._video_path = "~/Documents/"+str(time.localtime()[3])+str(time.localtime()[4])+str(time.localtime()[5])
	sim_state = sim.get_state()
	control_vector_length=sim.data.ctrl.__len__()
	#print("control_vector_length: "+str(control_vector_length))

	number_of_task_samples=est_activations.shape[0]

	real_attempt_positions = np.zeros((number_of_task_samples,2))
	real_attempt_activations = np.zeros((number_of_task_samples,3))
	chassis_pos=np.zeros(number_of_task_samples,)
	sim.set_state(sim_state)
	for ii in range(number_of_task_samples):
	    sim.data.ctrl[:] = est_activations[ii,:]
	    sim.step()
	    current_positions_array = sim.data.qpos[-2:]
	    # current_kinematics_array=np.array(
	    # 	[sim.data.qpos[0],
	    # 	sim.data.qvel[0],
	    # 	sim.data.qacc[0],
	    # 	sim.data.qpos[1],
	    # 	sim.data.qvel[1],
	    # 	sim.data.qacc[1]]
	    # 	)
	    chassis_pos[ii]=sim.data.get_geom_xpos("Chassis_frame")[0]
	    real_attempt_positions[ii,:] = current_positions_array
	    real_attempt_activations[ii,:] = sim.data.ctrl
	    if Mj_render:
	    	viewer.render()
	real_attempt_kinematics = positions_to_kinematics_fcn(
		real_attempt_positions[:,0], real_attempt_positions[:,1], timestep = 0.01)
	return real_attempt_kinematics, real_attempt_activations, chassis_pos
Пример #6
0
def test_dampingControl():
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    timestep = generatePatternedTrajectories.TIMESTEP
    control_freq = 1/timestep
    total_time = 2
    num_cycles = int(total_time * control_freq)
    plt.ion()
    LW = 1.0
    fig = plt.figure(figsize=(4,15))
    axes = []
    lines = []
    goals = []
    for i in range(7):
        axes.append(fig.add_subplot(7,1,i+1))
        lines.append(axes[i].plot([],[],'b-', lw=LW)[0])
        goals.append(axes[i].plot([],[],'r-', lw=LW)[0])
        axes[i].set_ylim([-1, 1])
        axes[i].set_xlim([0,total_time])
        axes[i].set_ylabel("Angle{}(rad)".format(i), fontsize=8)
        axes[i].set_xlabel("Time(s)", fontsize=8)

    for test in range(5):
        q_init = bounds.getRandPosInBounds()
        qd_goal = np.zeros(7)
        qd_init = np.random.rand(7)
        for g in range(7):
            goals[g].set_ydata(np.ones(num_cycles) * qd_goal[g])
            goals[g].set_xdata(np.linspace(0,3,num_cycles))
        sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={}))
        sim.step()
        sim_time = 0
        for i in range(num_cycles):
            state = sim.get_state()
            q = state[1]
            qd = state[2]
            sim.data.ctrl[:] = controllers.dampingControl(qd=qd)
            sim.step()
            viewer.render()
            if i % 35 == 0:
                for a in range(7):
                    lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time))
                    lines[a].set_ydata(np.append(lines[a].get_ydata(), qd[a]))
                fig.canvas.draw()
                fig.canvas.flush_events()
            sim_time += timestep
            if bounds.outOfBounds(q):
                break
        for i in range(7):
            lines[i].set_xdata([])
            lines[i].set_ydata([])
        time.sleep(1)
Пример #7
0
class MyEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self):
        self.model = load_model_from_path("xmls/Tesrt.xml")
        self.sim = MjSim(self.model)
        self.viewer = None
        self.sim_state = self.sim.get_state()
        self.bodynames = [
            'torso1', 'head', 'uwaist', 'lwaist', 'butt',
            'right_thigh1', 'right_shin1', 'right_foot_cap1', 'right_foot_cap2', 'left_thigh1',
            'left_shin1', 'left_foot_cap1', 'left_foot_cap2', 'right_uarm1', 'right_larm',
            'right_hand', 'left_uarm1', 'left_larm', 'left_hand'
        ]

        ones_act = np.ones(len(self.sim.data.ctrl))
        ones_obs = np.ones(self._get_state().shape[0])

        self.action_space = spaces.Box(-ones_act, ones_act)
        self.observation_space = spaces.Box(-ones_obs, ones_obs)

    def _get_state(self):
        torso = []
        ret = []
        for i in range(len(self.bodynames)):
            vec = self.sim.data.get_geom_xpos(self.bodynames[i])
            if i==0:
                ret = vec
                torso = vec
            if i!=0:
                ret = np.append(ret, vec-torso)
        return ret
    
    def _get_reward(self):
        return self.sim.data.get_geom_xpos('head')[2]

    def _step(self, action):
        for i in range(len(action)):
            self.sim.data.ctrl[i] = action[i] * 0.5
        
        self.sim.step()
        self.sim.step()
        return self._get_state(),self._get_reward(),False,{}

    def _reset(self):
        self.sim.set_state(self.sim_state)
        return self._get_state()

    def _render(self, mode = 'human', close = False):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()
Пример #8
0
def test_cycle_through_orientations():
    panda_kinematics = ikpy_panda_kinematics.panda_kinematics()
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    x_target, y_target, z_target = 0.2, 0.0, 0.5
    step1 = np.linspace(0,np.pi/2, 100)
    step2 = np.linspace(np.pi/2, -np.pi/2, 200)
    step3 = np.linspace(-np.pi/2, 0, 100)
    sweep = np.concatenate((step1, step2, step3))
    roll = 0
    pitch = 0
    yaw = np.pi


    ee_goal = [x_target, y_target, z_target, roll, pitch, yaw]
    qinit = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6])

    sim.set_state(MjSimState(time=0,qpos=qinit,qvel=np.zeros(7),act=None,udd_state={}))
    sim.step()

    qgoal = qinit
    q = qinit
    qd = np.zeros(7)
    count = -1
    num_steps_per_change = 4
    for i in range(num_steps_per_change * 400):
        if i % num_steps_per_change == 0:
            count += 1
            ee_goal = [x_target, y_target, z_target, roll, pitch, sweep[count] + yaw]
            qgoal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q)
            R1 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(rpy=[roll, pitch, sweep[count] + yaw])
            R2 = panda_kinematics.euler_angles_to_rpy_rotation_matrix(panda_kinematics.forward_kinematics(qgoal)[3:6])
            # R3 = sim.data.body_xmat[sim.model.body_name2id("right_hand")].reshape(3, 3)
            print("R1:\n", R1)
            print("R2:\n", R2)
            # print("R3:\n", R3)
            print("EE:", np.around(ee_goal, 3))
            print("q: ", np.around(qgoal, 3))

        state = sim.get_state()
        q = state[1]
        qd = state[2]
        sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=qgoal)
        sim.step()
        viewer.render()

    time.sleep(1)
Пример #9
0
    def solve_disc_value_iteration(self, model: MjSim, cost_func):
        [row, col] = np.shape(states)
        for iter in range(6):
            print(f"iteration: {iter}")
            for s_1 in range(col):
                for s_2 in range(col):
                    for ctrl in range(self._ctrls.shape[0]):
                        # reinitialise the state
                        model.set_state(
                            State(time=0,
                                  qpos=np.array([states[0][s_1]]),
                                  qvel=np.array([states[1][s_2]]),
                                  act=0,
                                  udd_state={}))
                        model.data.ctrl[0] = self._ctrls[ctrl]
                        value_curr = cost_func(
                            np.append(model.data.xipos[1], model.data.qvel[0]),
                            model.data.ctrl)
                        model.step()
                        # solve for the instantaneous cost and interpolate the value at the next state

                        if (self._states[0][0] < model.data.qpos[0] < self._states[0][-1]) and \
                                self._states[1][0] < model.data.qvel[0] < self._states[1][-1]:
                            value_curr += interpolate.griddata(
                                np.array([self.P.ravel(),
                                          self.V.ravel()]).T,
                                self._values.ravel(),
                                np.array(
                                    [model.data.qpos[0],
                                     model.data.qvel[0]]).T)
                        else:
                            rbf_net = interpolate.Rbf(self.P.ravel(),
                                                      self.V.ravel(),
                                                      self._values.ravel(),
                                                      function="linear")
                            value_curr += rbf_net(model.data.qpos[0],
                                                  model.data.qvel[0])

                        # Hacky fix for the first iteration of vi
                        if ctrl == 0 and iter == 0:
                            self._values[s_1][s_2] = value_curr
                        if value_curr < self._values[s_1][s_2]:
                            # print(f"Difference = {value_curr - self._values[s_1][s_2]} index {s_1}{s_2}")
                            self._values[s_1][s_2] = value_curr
        return self._values
Пример #10
0
class Mujoco_Renderer():
    def __init__(self, mujoco_xml, hp):
        from mujoco_py import load_model_from_path, MjSim
        mujoco_xml = '/'.join(str.split(gcp.__file__, '/')[:-1]) + '/' + mujoco_xml
        self.sim = MjSim(load_model_from_path(mujoco_xml))
        self._hp = hp

    def render(self, qpos):
        sim_state = self.sim.get_state()
        sim_state.qpos[:2] = qpos
        sim_state.qvel[:] = np.zeros_like(self.sim.data.qvel)
        self.sim.set_state(sim_state)
        self.sim.forward()
        
        subgoal_image = self.sim.render(self._hp.mpar.img_sz, self._hp.mpar.img_sz, camera_name='maincam')
        # plt.imshow(subgoal_image)
        # plt.savefig('test.png')
        return subgoal_image
Пример #11
0
    def __init__(self, model: MujocoModel, fileName):

        self.FileName = fileName
        self.Model = model
        self.Config = TaskConfig(fileName)

        sim = MjSim(self.Model.MujocoModel)

        self.StartState = self.MakeState(sim, self.Config.StartConfig)
        self.EndState = self.MakeState(sim, self.Config.EndConfig)

        self.TargetPos = {}
        self.TargetAngle = {}

        joints = self.Model.JointList

        sim.set_state(self.EndState)
        sim.step()

        for joint in joints:
            self.TargetPos[joint.Site] = np.array(
                sim.data.get_site_xpos(joint.Site))
            self.TargetAngle[joint.Site] = np.array(
                self.MatToAngle(sim.data.get_site_xmat(joint.Site)))
Пример #12
0
class MujocoSimulation(Simulation):
    def __init__(self, name, total_time=1000, recording_time=[0, 1000]):
        super(MujocoSimulation, self).__init__(name,
                                               total_time=total_time,
                                               recording_time=recording_time)
        self.model = load_model_from_path('mujoco_xmls/' + name + '.xml')
        self.sim = MjSim(self.model)
        self.initial_state = self.sim.get_state()
        self.input_size = len(self.sim.data.ctrl)
        self.reset()
        self.viewer = None

    def reset(self):
        super(MujocoSimulation, self).reset()
        self.sim.set_state(self.initial_state)

    def run(self, reset=True):
        if reset:
            self.reset()
        self.sim.set_state(self.initial_state)
        for i in range(self.total_time):
            self.sim.data.ctrl[:] = self.ctrl_array[i]
            self.sim.step()
            self.trajectory.append(self.sim.get_state())
        self.alreadyRun = True

    def get_trajectory(self, all_info=True):
        if not self.alreadyRun:
            self.run()
        if all_info:
            return self.trajectory.copy()
        else:
            return [x.qpos for x in self.trajectory]

    def get_recording(self, all_info=True):
        traj = self.get_trajectory(all_info=all_info)
        return traj[self.recording_time[0]:self.recording_time[1]]

    def watch(self, repeat_count=4):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        for _ in range(repeat_count):
            self.sim.set_state(self.initial_state)
            for i in range(self.total_time):
                self.sim.data.ctrl[:] = self.ctrl_array[i]
                self.sim.step()
                self.viewer.render()
        self.run(
            reset=False
        )  # so that the trajectory will be compatible with what user watches
Пример #13
0
class BaseMujocoHexRobot(HexRobot):
    """The hexapod robot in mujoco. Child classes need to define the joint names."""
    @classmethod
    @abc.abstractproperty
    def joint_names(cls) -> Tuple[str, ...]:
        """The names of the joints in the mujoco xml."""
        raise NotImplementedError

    def __init__(self, load_path: str, viewer: bool = True):
        """Initialize a hex robot in mujoco.

        Parameters
        ---------
        load_path : str
            Path to the xml file.
        viewer : bool
            Flag to create a MjViewer for the robot. By default a viewer will be created.
        """
        model = load_model_from_path(load_path)
        self._sim = MjSim(model)

        self.num_joints = len(self.joint_names)

        self._joint_qpos_ids = [
            self._sim.model.get_joint_qpos_addr(x) for x in self.joint_names
        ]
        self._joint_qvel_ids = [
            self._sim.model.get_joint_qvel_addr(x) for x in self.joint_names
        ]
        self._joint_actuator_id_map = dict(
            zip(self.joint_names, range(self.num_joints)))

        if viewer:
            self._viewer = MjViewer(self._sim)
        else:
            self._viewer = None

    def get_state(self) -> HexState:
        """Retrieve the current state of the robot.

        Returns
        -------
        HexState
            The current state of the robot.
        """
        sim_state = self._sim.get_state()
        joint_pos = sim_state.qpos[self._joint_qpos_ids]
        joint_vel = sim_state.qvel[self._joint_qvel_ids]
        return HexState(
            qpos=OrderedDict(zip(self.joint_names, joint_pos)),
            qvel=OrderedDict(zip(self.joint_names, joint_vel)),
            acceleration=np.zeros(3),
        )  # TODO get the acceleration.

    def set_joint_positions(self, joints: Dict[str, float]) -> None:
        """Sets the joint positions to the specified values

        Parameters
        ----------
        joints : Dict[str, float]
            A mapping between the joint name and its value to be set.
        """
        sim_state = self._sim.get_state()
        for name, value in joints.items():
            joint_id = self._sim.model.get_joint_qpos_addr(name)
            sim_state.qpos[joint_id] = value
        self._sim.set_state(sim_state)
        self._sim.forward()

    def step(self) -> None:
        """Steps the simulation forward by one step.

        Updates the visualizer if one is available.
        """
        self._sim.step()
        if self._viewer is not None:
            self._viewer.render()

    def set_command(self, command: Dict[str, float]) -> None:
        """Set the command of the robot.

        Parameters
        ----------
        command : Dict[str, float]
            The commands to the robot.
        """
        for name, value in command.items():
            self._sim.data.ctrl[self._joint_actuator_id_map[name]] = value
Пример #14
0
class MujocoEnv(gym.Env):
    def __init__(self,
                 model_path,
                 frame_skip=1,
                 action_noise=0.0,
                 random_init_state=True):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)

        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        self.init_qacc = self.data.qacc.ravel().copy()
        self.init_ctrl = self.data.ctrl.ravel().copy()

        self.qpos_dim = self.init_qpos.size
        self.qvel_dim = self.init_qvel.size
        self.ctrl_dim = self.init_ctrl.size

        self.action_noise = action_noise
        self.random_init_state = random_init_state
        """
        if "init_qpos" in self.model.numeric_names:
            init_qpos_id = self.model.numeric_names.index("init_qpos")
            addr = self.model.numeric_adr.flat[init_qpos_id]
            size = self.model.numeric_size.flat[init_qpos_id]
            init_qpos = self.model.numeric_data.flat[addr:addr + size]
            self.init_qpos = init_qpos

        """
        self.dcom = None
        self.current_com = None
        self.reset()
        super(MujocoEnv, self).__init__()

    @property
    def action_space(self):
        bounds = self.model.actuator_ctrlrange.copy()
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        return spaces.Box(lb, ub)

    @property
    def observation_space(self):
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        return spaces.Box(ub * -1, ub)

    @property
    def action_bounds(self):
        return self.action_space.low, self.action_space.high

    def reset_mujoco(self, init_state=None):
        if init_state is None:
            if self.random_init_state:
                qp = self.init_qpos.copy() + \
                     np.random.normal(size=self.init_qpos.shape) * 0.01
                qv = self.init_qvel.copy() + \
                     np.random.normal(size=self.init_qvel.shape) * 0.1
            else:
                qp = self.init_qpos.copy()
                qv = self.init_qvel.copy()

            qacc = self.init_qacc.copy()
            ctrl = self.init_ctrl.copy()

        else:
            pass
            """
            start = 0
            for datum_name in ["qpos", "qvel", "qacc", "ctrl"]:
                datum = getattr(self.data, datum_name)
                datum_dim = datum.shape[0]
                datum = init_state[start: start + datum_dim]
                setattr(self.model.data, datum_name, datum)
                start += datum_dim
            """
        self.set_state(qp, qv)

    def reset(self, init_state=None):

        # self.reset_mujoco(init_state)
        self.sim.reset()
        self.sim.forward()

        self.current_com = self.data.subtree_com[0]
        self.dcom = np.zeros_like(self.current_com)

        return self.get_current_obs()

    def set_state(self, qpos, qvel, qacc):
        assert qpos.shape == (self.qpos_dim, ) and qvel.shape == (
            self.qvel_dim, ) and qacc.shape == (self.qacc_dim, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.model.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.data.qpos,
            self.data.qvel,
            self.data.qacc,
            self.data.ctrl,
        ]).ravel()

    def inject_action_noise(self, action):
        # generate action noise
        noise = self.action_noise * \
                np.random.normal(size=action.shape)
        # rescale the noise to make it proportional to the action bounds
        lb, ub = self.action_bounds
        noise = 0.5 * (ub - lb) * noise
        return action + noise

    def forward_dynamics(self, action):
        ctrl = self.inject_action_noise(action)
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]

        for _ in range(self.frame_skip):
            self.sim.step()

        new_com = self.data.subtree_com[0]
        self.dcom = new_com - self.current_com
        self.current_com = new_com

    def get_viewer(self, config=None):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            # self.viewer.start()
            # self.viewer.set_model(self.model)
        if config is not None:
            pass
            # self.viewer.set_window_pose(config["xpos"], config["ypos"])
            # self.viewer.set_window_size(config["width"], config["height"])
            # self.viewer.set_window_title(config["title"])
        return self.viewer

    def render(self, close=False, mode='human', config=None):
        if mode == 'human':
            # viewer = self.get_viewer(config=config)
            try:
                self.viewer.render()

            except:
                self.get_viewer(config=config)
                self.viewer.render()
        elif mode == 'rgb_array':
            viewer = self.get_viewer(config=config)
            viewer.loop_once()
            # self.get_viewer(config=config).render()
            data, width, height = self.get_viewer(config=config).get_image()
            return np.fromstring(data,
                                 dtype='uint8').reshape(height, width,
                                                        3)[::-1, :, :]
        if close:
            self.stop_viewer()

    # def start_viewer(self):
    #     viewer = self.get_viewer()
    #     if not viewer.running:
    #         viewer.start()
    #
    # def stop_viewer(self):
    #     if self.viewer:
    #         self.viewer.finish()
    #         self.viewer = None

    # def release(self):
    #     # temporarily alleviate the issue (but still some leak)
    #     from learning_to_adapt.mujoco_py.mjlib import mjlib
    #     mjlib.mj_deleteModel(self.model._wrapped)
    #     mjlib.mj_deleteData(self.data._wrapped)

    def get_body_xmat(self, body_name):
        idx = self.model.body_names.index(body_name)
        return self.data.ximat[idx].reshape((3, 3))

    def get_body_com(self, body_name):
        return self.data.get_body_xpos(body_name)

    def get_body_comvel(self, body_name):
        idx = self.model.body_names.index(body_name)

        ## _compute_subtree
        body_vels = np.zeros((self.model.nbody, 6))
        # bodywise quantities
        mass = self.model.body_mass.flatten()
        for i in range(self.model.nbody):
            # body velocity
            # Compute object 6D velocity in object-centered frame, world/local orientation.
            # mj_objectVelocity(const mjModel* m, const mjData* d, int objtype, int objid, mjtMum* res, int flg_local)
            mujoco_py.cymj._mj_objectVelocity(self.model, self.data, 1, i,
                                              body_vels[i], 0)
        lin_moms = body_vels[:, 3:] * mass.reshape((-1, 1))

        # init subtree mass
        body_parentid = self.model.body_parentid
        # subtree com and com_vel
        for i in range(self.model.nbody - 1, -1, -1):
            if i > 0:
                parent = body_parentid[i]
                # add scaled velocities
                lin_moms[parent] += lin_moms[i]
                # accumulate mass
                mass[parent] += mass[i]
        return_ = lin_moms / mass.reshape((-1, 1))
        return return_[idx]

        # return self.model.body_comvels[idx]

    # def get_body_comvel(self, body_name):
    #     idx = self.model.body_names.index(body_name)
    #
    #     return self.model.body_comvels[idx]

    # def print_stats(self):
    #     super(MujocoEnv, self).print_stats()
    #     print("qpos dim:\t%d" % len(self.data.qpos))

    def action_from_key(self, key):
        raise NotImplementedError

    # def set_state_tmp(self, state, restore=True):
    #     if restore:
    #         prev_pos = self.data.qpos
    #         prev_qvel = self.data.qvel
    #         prev_ctrl = self.data.ctrl
    #         prev_act = self.data.act
    #     qpos, qvel = self.decode_state(state)
    #     self.model.data.qpos = qpos
    #     self.model.data.qvel = qvel
    #     self.model.forward()
    #     yield
    #     if restore:
    #         self.data.qpos = prev_pos
    #         self.data.qvel = prev_qvel
    #         self.data.ctrl = prev_ctrl
    #         self.data.act = prev_act
    #         self.model.forward()

    def get_param_values(self):
        return {}

    def set_param_values(self, values):
        pass
Пример #15
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip=1, xml_string=""):
        """
        @param model_path path of the default model
        @param xml_string if given, the model will be reset using these values
        """

        fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.model = load_model_from_path(fullpath)
        with open(fullpath, 'r') as f:
            self.model_xml = f.read()
            self.default_xml = self.model_xml

        if xml_string != "":
            self.model = load_model_from_xml(xml_string)
            self.model_xml = xml_string

        self.frame_skip = frame_skip

        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([
            o.size for o in observation
        ]) if type(observation) is tuple else observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()
        self.set_param_space()

    def get_params(self):
        """
        Returns a dict of (param_name, param_value)
        """
        return MujocoUpdater(self.model_xml).get_params()

    def set_params(self, params):
        """
        @param params: dict(param_name, param_value)
        param_name should be a string of bodyname__type__paramname
        where type is either geom or joint,
        e.g. thigh__joint__friction,
        and param_value is a numpy array
        """
        # invalidate cached properties
        self.__dict__.pop('action_space', None)
        self.__dict__.pop('observation_space', None)

        new_xml = MujocoUpdater.set_params(self.model_xml, params)
        self.__init__(xml_string=new_xml)
        self.reset()
        return self

    def set_param_space(self, param_space=None, eps_scale=0.5, replace=True):
        """
        Set param_space
        @param param_space: dict(string, gym.space.base.Space)
        @param eps_scale: scale of variation applied to all params
        @param replace: if true, param_space overwrites default param_space.
                        Default behavior is to merge.
        """
        if param_space is not None:
            if replace:
                self._param_space = param_space
            else:
                self._param_space = {**self._param_space, **param_space}
        else:
            params = MujocoUpdater(self.model_xml).get_params()
            self._param_space = dict()
            for param, value in params.items():
                eps = np.abs(value) * eps_scale
                ub = value + eps
                lb = value - eps
                for name in positive_params:
                    if name in param:
                        lb = np.clip(lb, 1e-3, ub)
                        break
                space = spaces.Box(lb, ub)
                self._param_space[param] = space

    def get_geom_params(self, body_name):
        geom = MujocoUpdater(self.model_xml).get_geom(body_name)
        return {
            k: v
            for k, v in geom.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_joint_params(self, body_name):
        joint = MujocoUpdater(self.model_xml).get_joint(body_name)
        return {
            k: v
            for k, v in joint.attrib.items()
            if k not in MujocoUpdater.ignore_params
        }

    def get_body_names(self):
        return MujocoUpdater(self.model_xml).get_body_names()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_current_obs(self):
        return self._get_full_obs()

    def _get_full_obs(self):
        data = self.sim.data
        cdists = np.copy(self.model.geom_margin).flat
        for c in self.sim.data.contact:
            cdists[c.geom2] = min(cdists[c.geom2], c.dist)
        obs = np.concatenate([
            data.qpos.flat,
            data.qvel.flat,
            # data.cdof.flat,
            data.cinert.flat,
            data.cvel.flat,
            # data.cacc.flat,
            data.qfrc_actuator.flat,
            data.cfrc_ext.flat,
            data.qfrc_constraint.flat,
            cdists,
            # data.qfrc_bias.flat,
            # data.qfrc_passive.flat,
            self.dcom.flat,
        ])
        return obs

    @property
    def _state(self):
        return np.concatenate(
            [self.sim.data.qpos.flat, self.sim.data.qvel.flat])

    @property
    def _full_state(self):
        return np.concatenate([
            self.sim.data.qpos,
            self.sim.data.qvel,
            self.sim.data.qacc,
            self.sim.data.ctrl,
        ]).ravel()

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def reset(self, randomize=True):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    # Added for bayesian_rl
    def get_sim_state(self):
        return self.sim.get_state()

    # Added for bayesian_rl
    def set_sim_state(self, state):
        self.sim.set_state(state)

    # Added for bayesian_rl
    def set_state_vector(self, state_vector):
        qpos = state_vector[:self.model.nq]
        qvel = state_vector[self.model.nq:]
        self.set_state(qpos, qvel)

    # Added for bayesian_rl
    def get_state_vector(self):
        return self.state_vector()

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
            if mode == 'human':
                self.viewer = mujoco_py.MjViewer(self.sim)
            elif mode == 'rgb_array' or mode == 'depth_array':
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
            self._viewers[mode] = self.viewer
        return self.viewer

    def close(self):
        if self.viewer is not None:
            # self.viewer.finish()
            self.viewer = None
            self._viewers = {}

    # def step(self, a):
    #     return self._step(a)

    # Added for bayesian_rl
    def take_action(self, a):
        self.step(a)
        return self.get_sim_state()

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['mean']
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=640,
                                             height=480,
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + '_' + str(ep) + ".mp4"
            skvideo.io.vwrite(file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))

    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        if mode == 'rgb_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
        elif mode == 'depth_array':
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width,
                                                      height,
                                                      depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
        elif mode == 'human':
            self._get_viewer(mode).render()
class DeterministicGraspPolicy(Policy):
    def __init__(self, agentparams, policyparams):
        Policy.__init__(self)
        self.agentparams = agentparams
        self.min_lift = agentparams.get('min_z_lift', 0.05)

        self.policyparams = policyparams
        self.adim = agentparams['adim']
        self.n_actions = policyparams['nactions']

        if 'num_samples' in self.policyparams:
            self.M = self.policyparams['num_samples']
        else:
            self.M = 20  # number of CEM Samples

        if 'best_to_take' in self.policyparams:
            self.K = self.policyparams['best_to_take']
        else:
            self.K = 5  # best samples to average for next sampling

        assert self.adim >= 4, 'must have at least x, y, z + gripper actions'

        self.moveto = True
        self.drop = False
        self.grasp = False
        self.lift = False

        if 'iterations' in self.policyparams:
            self.niter = self.policyparams['iterations']
        else:
            self.niter = 10  # number of iterations
        self.imgs = []
        self.iter = 0

    def setup_CEM_model(self, t, init_model):
        if t == 0:
            if 'gen_xml' in self.agentparams:

                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['gen_xml_fname']))
            else:
                self.CEM_model = MjSim(
                    load_model_from_path(self.agentparams['filename']))

        self.initial_qpos = init_model.data.qpos.copy()
        self.initial_qvel = init_model.data.qvel.copy()

        self.reset_CEM_model()

    def reset_CEM_model(self):
        if len(self.imgs) > 0:
            print('saving iter', self.iter, 'with frames:', len(self.imgs))
            npy_to_gif(
                self.imgs,
                os.path.join(self.agentparams['record'],
                             'iter_{}'.format(self.iter)))
            self.iter += 1

        sim_state = self.CEM_model.get_state()
        sim_state.qpos[:] = self.initial_qpos.copy()
        sim_state.qvel[:] = self.initial_qvel.copy()
        self.CEM_model.set_state(sim_state)

        self.prev_target = self.CEM_model.data.qpos[:self.adim].squeeze().copy(
        )
        self.target = self.CEM_model.data.qpos[:self.adim].squeeze().copy()

        for _ in range(5):
            self.step_model(self.target)

        self.imgs = []

    def viewer_refresh(self):
        if 'debug_viewer' in self.policyparams and self.policyparams[
                'debug_viewer']:
            large_img = self.CEM_model.render(
                640, 480, camera_name="maincam")[::-1, :, :]
            self.imgs.append(large_img)

    def perform_CEM(self, targetxy):
        self.reset_CEM_model()

        if 'object_meshes' in self.agentparams:
            targetxy = self.CEM_model.data.sensordata[:2].squeeze().copy()
        print('Beginning CEM')
        ang_dis_mean = self.policyparams['init_mean'].copy()
        ang_dis_cov = self.policyparams['init_cov'].copy()
        scores = np.zeros(self.M)
        best_score, best_ang, best_xy = -1, None, None

        for n in range(self.niter):
            ang_disp_samps = np.random.multivariate_normal(ang_dis_mean,
                                                           ang_dis_cov,
                                                           size=self.M)

            for s in range(self.M):
                #print('On iter', n, 'sample', s)
                self.reset_CEM_model()
                move = True
                drop = False
                grasp = False
                g_start = 0
                lift = False

                angle_delta = ang_disp_samps[s, 0]
                targetxy_delta = targetxy + ang_disp_samps[s, 1:]
                print('init iter')
                print(targetxy)
                print(angle_delta, targetxy_delta)
                for t in range(self.n_actions):
                    angle_action = np.zeros(self.adim)
                    cur_xy = self.CEM_model.data.qpos[:2].squeeze()

                    if move and np.linalg.norm(
                            targetxy_delta - cur_xy,
                            2) <= self.policyparams['drop_thresh']:
                        move = False
                        drop = True

                    if drop and self.CEM_model.data.qpos[2] <= -0.079:
                        drop = False
                        grasp = True
                        g_start = t
                    if grasp and t - g_start > 2:
                        grasp = False
                        lift = True
                    if move:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif drop:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = -100
                    elif grasp:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = -0.08
                        angle_action[3] = angle_delta
                        angle_action[4] = 21
                    elif lift:
                        angle_action[:2] = targetxy_delta
                        angle_action[2] = self.agentparams['ztarget']
                        angle_action[3] = angle_delta
                        angle_action[4] = 21

                    self.step_model(angle_action)
                # print 'final z', self.CEM_model.data.qpos[8].squeeze(), 'with angle', angle_samps[s]

                if 'object_meshes' in self.agentparams:
                    obj_z = self.CEM_model.data.sensordata[2].squeeze()
                else:
                    obj_z = self.CEM_model.data.qpos[8].squeeze()

                print('obj_z at iter', n * self.M + s, 'is', obj_z)

                scores[s] = obj_z

                if 'stop_iter_thresh' in self.policyparams and scores[
                        s] > self.policyparams['stop_iter_thresh']:
                    return ang_disp_samps[s, 0], ang_disp_samps[s, 1:]
                # print 'score',scores[s]

            best_scores = np.argsort(-scores)[:self.K]

            if scores[best_scores[0]] > best_score or best_ang is None:

                #print('best', scores[best_scores[0]])

                best_score = scores[best_scores[0]]
                best_ang = ang_disp_samps[best_scores[0], 0]
                best_xy = ang_disp_samps[best_scores[0], 1:]

            ang_dis_mean = np.mean(ang_disp_samps[best_scores, :], axis=0)
            ang_dis_cov = np.cov(ang_disp_samps[best_scores, :].T)

        return best_ang, best_xy

    def step_model(self, input_actions):
        pos_clip = self.agentparams['targetpos_clip']

        self.prev_target = self.target.copy()
        self.target = input_actions.copy()
        self.target = np.clip(self.target, pos_clip[0], pos_clip[1])

        for s in range(self.agentparams['substeps']):
            step_action = s / float(self.agentparams['substeps']) * (
                self.target - self.prev_target) + self.prev_target
            self.CEM_model.data.ctrl[:] = step_action
            self.CEM_model.step()

        self.viewer_refresh()

        #print "end", self.CEM_model.data.qpos[:4].squeeze()

    def act(self,
            traj,
            t,
            init_model=None,
            goal_object_pose=None,
            hyperparams=None,
            goal_image=None):
        """
        Scripted pick->place->wiggle trajectory. There's probably a better way to script this
        but booleans will suffice for now.
        """
        self.setup_CEM_model(t, init_model)

        if t == 0:
            self.moveto = True
            self.drop = False
            self.lift = False
            self.grasp = False
            self.second_moveto = False
            self.final_drop = False
            self.wiggle = False
            self.switchTime = 0
            print('start pose', traj.Object_pose[t, 0, :3])
            self.targetxy = traj.Object_pose[t, 0, :2]
            self.angle, self.disp = self.perform_CEM(self.targetxy)
            print('best angle', self.angle, 'best target', self.targetxy)
            self.targetxy += self.disp
            traj.desig_pos = np.zeros((2, 2))
            traj.desig_pos[0] = self.targetxy.copy()

        if self.lift and traj.X_full[t, 2] >= self.min_lift:
            self.lift = False

            if traj.Object_full_pose[t, 0, 2] > -0.01:  #lift occursed
                self.second_moveto = True
                self.targetxy = np.random.uniform(-0.2, 0.2, size=2)
                print("LIFTED OBJECT!")
                print('dropping at', self.targetxy)
                traj.desig_pos[1] = self.targetxy.copy()
            else:
                self.wiggle = True

        if self.grasp and self.switchTime > 0:
            print('lifting at time', t, '!', 'have z', traj.X_full[t, 2])

            self.grasp = False
            self.lift = True

        if self.drop and (traj.X_full[t, 2] <= -0.079 or self.switchTime >= 2):
            print('grasping at time', t, '!', 'have z', traj.X_full[t, 2])
            self.drop = False
            self.grasp = True
            self.switchTime = 0

        if self.moveto and (np.linalg.norm(traj.X_full[t, :2] - self.targetxy,
                                           2) <=
                            self.policyparams['drop_thresh']):
            if self.switchTime > 0:
                print('stopping moveto at time', t, '!')
                print(traj.X_full[t, :2], self.targetxy)
                self.moveto = False
                self.drop = True
                self.switchTime = 0
            else:
                self.switchTime += 1

        if self.second_moveto and np.linalg.norm(
                traj.X_full[t, :2] - self.targetxy,
                2) <= self.policyparams['drop_thresh']:
            self.second_moveto = False
            self.final_drop = True
            self.switchTime = 0

        actions = np.zeros(self.adim)
        if self.moveto or self.second_moveto:
            delta = np.zeros(3)
            delta[:2] = self.targetxy - traj.target_qpos[t, :2]

            if 'xyz_std' in self.policyparams:
                delta += self.policyparams['xyz_std'] * np.random.normal(
                    size=3)

            norm = np.sqrt(np.sum(np.square(delta)))
            if norm > self.policyparams['max_norm']:
                delta *= self.policyparams['max_norm'] / norm

            actions[:3] = traj.target_qpos[t, :3] + delta
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle

            if self.moveto:
                actions[-1] = -1
            else:
                actions[-1] = 1

            self.switchTime += 1

        elif self.drop:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = -1
            self.switchTime += 1

        elif self.lift:
            actions[:2] = self.targetxy
            actions[2] = self.agentparams['ztarget']
            actions[3] = self.angle
            actions[-1] = 1

        elif self.grasp:
            actions[:2] = self.targetxy
            actions[2] = -0.08
            actions[3] = self.angle
            actions[-1] = 1
            self.switchTime += 1
        elif self.final_drop:

            if self.switchTime > 0:
                print('opening')
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

            if self.switchTime > 1:
                print('up')
                actions[:2] = self.targetxy
                actions[2] = self.agentparams['ztarget'] + np.random.uniform(
                    -0.03, 0.05)
                actions[3] = self.angle + np.random.uniform(
                    -np.pi / 4., np.pi / 4.)
                actions[-1] = -1
                self.final_drop = False
                self.wiggle = True
                self.switchTime = 0
            else:
                actions[:2] = self.targetxy
                actions[2] = -0.08
                actions[3] = self.angle
                actions[-1] = -1
                self.switchTime += 1

        elif self.wiggle:
            delta_vec = np.random.normal(size=2)
            norm = np.sqrt(np.sum(np.square(delta_vec)))
            if norm > self.policyparams['max_norm']:
                delta_vec *= self.policyparams['max_norm'] / norm
            actions[:2] = np.clip(traj.target_qpos[t, :2] + delta_vec, -0.15,
                                  0.15)
            delta_z = np.random.uniform(-0.08 - traj.target_qpos[t, 2],
                                        0.3 - traj.target_qpos[t, 2])
            actions[2] = traj.target_qpos[t, 2] + delta_z
            actions[3] = traj.target_qpos[t, 3] + np.random.uniform(-0.1, 0.1)
            if np.random.uniform() > 0.5:
                actions[4] = -1
            else:
                actions[4] = 1

        if 'angle_std' in self.policyparams:
            actions[3] += self.policyparams['angle_std'] * np.random.normal()

        return actions - traj.target_qpos[t, :] * traj.mask_rel
Пример #17
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self, model_path, frame_skip):
        print(model_path)
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = mujoco_py.load_model_from_path(fullpath)
        self.timestep = self.model.opt.timestep * self.frame_skip
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None

        # changed a place of setting action space
        # sometimes use self.action_space at first call self._step()
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = observation.size

        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def viewer_setup(self):
        """
        This method is called when the viewer is initialized and after every reset
        Optionally implement this method, if you need to tinker with camera position
        and so forth.
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        ob = self.reset_model()
        if self.viewer is not None:
            # TODO:
            # self.viewer.autoscale()
            self.viewer_setup()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        sim_state = self.sim.get_state()
        sim_state.qpos[:] = qpos[:]
        sim_state.qvel[:] = qvel[:]
        self.sim.set_state(sim_state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        assert len(ctrl) is len(self.sim.data.ctrl)
        self.data.ctrl[:] = ctrl[:]
        for _ in range(n_frames):
            self.sim.step()

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer = None
            return

        if mode == 'rgb_array':
            data = self._get_viewer()._read_pixels_as_in_window()
            return data
        elif mode == 'human':
            self._get_viewer().render()
            return

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def get_body_com(self, body_name):
        return self.sim.data.get_body_xipos(body_name)

    def get_body_comvel(self, body_name):
        return self.sim.data.get_body_cvel(self.model.body_name2id(body_name))

    def get_body_xmat(self, body_name):
        return self.sim.data.get_body_xmat(body_name).reshape((3, 3))

    def state_vector(self):
        return np.concatenate([
            self.data.qpos.flat,
            self.data.qvel.flat
        ])
Пример #18
0
model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
viewer = MjViewer(sim)

states = [{'box:x': +0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': +0.8},
          {'box:x': -0.8, 'box:y': -0.8},
          {'box:x': +0.8, 'box:y': -0.8},
          {'box:x': +0.0, 'box:y': +0.0}]

# MjModel.joint_name2id returns the index of a joint in
# MjData.qpos.
x_joint_i = sim.model.get_joint_qpos_addr("box:x")
y_joint_i = sim.model.get_joint_qpos_addr("box:y")

print_box_xpos(sim)

while True:
    for state in states:
        sim_state = sim.get_state()
        sim_state.qpos[x_joint_i] = state["box:x"]
        sim_state.qpos[y_joint_i] = state["box:y"]
        sim.set_state(sim_state)
        sim.forward()
        print("updated state to", state)
        print_box_xpos(sim)
        viewer.render()

    if os.getenv('TESTING') is not None:
        break
Пример #19
0
class RobotSimulatorMuJoCo:
    def __init__(self, config_file_name, config_folder_path=''):
        # reading config file and mujoco general info
        config_data = util.read_config_file(config_file_name,config_folder_path)
        model_xml = config_data['MuJoCo']['model_xml']
        model_xml_path = os.path.join(os.environ[util.SIM_ENV_VAR],'xml/',model_xml)
        model = load_model_from_path(model_xml_path)
        self.sim = MjSim(model)
        self.render = config_data['MuJoCo']['render']
        # create robot and environment
        self.robot = Robot(config_data,self.sim.model)
        self.environment = Environment(config_data,self.sim.model)
        # simulator parameters
        self.param = {
            "mj_xml": model_xml_path,
            "total_time_limit": float('Inf')
            }
        # initialize and update from the configuration file
        self.initialize_from_config(config_data)
        # initialize render
        self.init_render()

    def initialize_from_config(self, config_data):
        # extract the init states from the configuration file and assign them to init_state
        init_state = self.sim.get_state()
        if not config_data.has_option('Robot','ignore_init'):
            init_state = self.robot.read_init_state_from_config(init_state)
        if not config_data.has_option('Environment','ignore_init'):
            init_state = self.environment.read_init_state_from_config(init_state)
        # set the init_state to the simulator
        self.sim.set_state(init_state)
        self.sim.forward()
        print("Simulator init states are set.")
        self.update_state()
        self.robot.enable_actuator()
        # update total time to stop the simulation if specified
        total_time_limit = config_data.getfloat('MuJoCo', 'total_time_limit')
        if (total_time_limit>0):
            self.param.update({'total_time_limit': total_time_limit})
        # update the experiment name and the sample ID if defined
        self.param.update(record_sampling_decimation=1)         
        self.param.update(record_sampling_idx=1)
        if (config_data.has_section('Uncertainty')):
            current_sample_id = config_data['Uncertainty']['current_sample_id']
            if (int(current_sample_id)>0):
                self.param.update({ 
                    'experiment_name' : config_data['Uncertainty']['experiment_name'],
                    'current_sample_id': current_sample_id
                    })
            if ('record_sampling_decimation' in config_data['Uncertainty']):
                self.param.update(
                    record_sampling_decimation=int(config_data['Uncertainty']['record_sampling_decimation']))

    def init_render(self):
        # [optional] init render
        if (self.render=="yes"):
            self.viewer = MjViewer(self.sim)
            print("viewer initialized")

    def step(self):
        self.sim.step()
        if (self.render=="yes"):
            self.viewer.render()

    def update_state(self):
        current_state = self.sim.get_state()
        self.robot.get_joint(current_state)
        self.environment.get_joint(current_state)

    def update_robot_control(self):
        ctrl_pos = self.robot.compute_control()
        for i, joint_name in enumerate(ctrl_pos):
            self.sim.data.ctrl[i]=ctrl_pos[i]

    def record_state(self):
        if (self.param['record_sampling_idx']==self.param['record_sampling_decimation']):
            self.robot.record_joint()
            self.environment.record_joint()
            self.param.update(record_sampling_idx=1)
        else:
            self.param.update(record_sampling_idx=self.param['record_sampling_idx']+1)

    def check_time_limit(self):
        if (self.sim.data.time<self.param['total_time_limit']):
            return False
        else:
            return True

    def dump_data(self):
        # this func dumps data only, because the model can be loaded using the xml of the simulation.
        if ('experiment_name' in self.param.keys()):
            # prepare save path
            export_folder_path = os.path.join(os.environ[util.SIM_DATA_ENV_VAR],
                self.param['experiment_name'],'data')
            if (not os.path.isdir(export_folder_path)):
                os.makedirs(export_folder_path)
            # save the simulator object without the MuJoCo model
            self.sim=[]
            file_name  = os.path.join(export_folder_path,self.param['current_sample_id']+'.joblib')
            joblib.dump(self, file_name)
            print(f'Simulation result saved at [{file_name}]')
Пример #20
0
class World:
    # Default configuration (this should not be nested since it gets copied)
    # *NOTE:* Changes to this configuration should also be reflected in `Engine` configuration
    DEFAULT = {
        'robot_base': 'xmls/car.xml',  # Which robot XML to use as the base
        'robot_xy': np.zeros(2),  # Robot XY location
        'robot_rot': 0,  # Robot rotation about Z axis
        'floor_size': [3.5, 3.5, .1],  # Used for displaying the floor

        # Objects -- this is processed and added by the Engine class
        'objects': {},  # map from name -> object dict
        # Geoms -- similar to objects, but they are immovable and fixed in the scene.
        'geoms': {},  # map from name -> geom dict
        # Mocaps -- mocap objects which are used to control other objects
        'mocaps': {},

        # Determine whether we create render contexts
        'observe_vision': False,
    }

    def __init__(self, config={}, render_context=None, render_device_id=-1):
        ''' config - JSON string or dict of configuration.  See self.parse() '''
        self.parse(config)  # Parse configuration
        self.first_reset = True
        self.viewer = None
        self.render_context = render_context
        self.render_device_id = render_device_id
        self.update_viewer_sim = False
        self.robot = Robot(self.robot_base)

    def parse(self, config):
        ''' Parse a config dict - see self.DEFAULT for description '''
        self.config = deepcopy(self.DEFAULT)
        self.config.update(deepcopy(config))
        for key, value in self.config.items():
            assert key in self.DEFAULT, f'Bad key {key}'
            setattr(self, key, value)

    @property
    def data(self):
        ''' Helper to get the simulation data instance '''
        return self.sim.data

    # TODO: remove this when mujoco-py fix is merged and a new version is pushed
    # https://github.com/openai/mujoco-py/pull/354
    # Then all uses of `self.world.get_sensor()` should change to `self.data.get_sensor`.
    def get_sensor(self, name):
        id = self.model.sensor_name2id(name)
        adr = self.model.sensor_adr[id]
        dim = self.model.sensor_dim[id]
        return self.data.sensordata[adr:adr + dim].copy()

    def build(self):
        ''' Build a world, including generating XML and moving objects '''
        # Read in the base XML (contains robot, camera, floor, etc)
        self.robot_base_path = os.path.join(BASE_DIR, self.robot_base)
        with open(self.robot_base_path) as f:
            self.robot_base_xml = f.read()
        self.xml = xmltodict.parse(
            self.robot_base_xml)  # Nested OrderedDict objects

        # Convenience accessor for xml dictionary
        worldbody = self.xml['mujoco']['worldbody']

        # Move robot position to starting position
        worldbody['body']['@pos'] = convert(np.r_[self.robot_xy,
                                                  self.robot.z_height])
        worldbody['body']['@quat'] = convert(rot2quat(self.robot_rot))

        # We need this because xmltodict skips over single-item lists in the tree
        worldbody['body'] = [worldbody['body']]
        if 'geom' in worldbody:
            worldbody['geom'] = [worldbody['geom']]
        else:
            worldbody['geom'] = []

        # Add equality section if missing
        if 'equality' not in self.xml['mujoco']:
            self.xml['mujoco']['equality'] = OrderedDict()
        equality = self.xml['mujoco']['equality']
        if 'weld' not in equality:
            equality['weld'] = []

        # Add asset section if missing
        if 'asset' not in self.xml['mujoco']:
            # old default rgb1: ".4 .5 .6"
            # old default rgb2: "0 0 0"
            # light pink: "1 0.44 .81"
            # light blue: "0.004 0.804 .996"
            # light purple: ".676 .547 .996"
            # med blue: "0.527 0.582 0.906"
            # indigo: "0.293 0 0.508"
            asset = xmltodict.parse('''
                <asset>
                    <texture type="skybox" builtin="gradient" rgb1="0.527 0.582 0.906" rgb2="0.1 0.1 0.35"
                        width="800" height="800" markrgb="1 1 1" mark="random" random="0.001"/>
                    <texture name="texplane" builtin="checker" height="100" width="100"
                        rgb1="0.7 0.7 0.7" rgb2="0.8 0.8 0.8" type="2d"/>
                    <material name="MatPlane" reflectance="0.1" shininess="0.1" specular="0.1"
                        texrepeat="10 10" texture="texplane"/>
                </asset>
                ''')
            self.xml['mujoco']['asset'] = asset['asset']

        # Add light to the XML dictionary
        light = xmltodict.parse('''<b>
            <light cutoff="100" diffuse="1 1 1" dir="0 0 -1" directional="true"
                exponent="1" pos="0 0 0.5" specular="0 0 0" castshadow="false"/>
            </b>''')
        worldbody['light'] = light['b']['light']

        # Add floor to the XML dictionary if missing
        if not any(g.get('@name') == 'floor' for g in worldbody['geom']):
            floor = xmltodict.parse('''
                <geom name="floor" type="plane" condim="6"/>
                ''')
            worldbody['geom'].append(floor['geom'])

        # Make sure floor renders the same for every world
        for g in worldbody['geom']:
            if g['@name'] == 'floor':
                g.update({
                    '@size': convert(self.floor_size),
                    '@rgba': '1 1 1 1',
                    '@material': 'MatPlane'
                })

        # Add cameras to the XML dictionary
        cameras = xmltodict.parse('''<b>
            <camera name="fixednear" pos="0 -2 2" zaxis="0 -1 1"/>
            <camera name="fixedfar" pos="0 -5 5" zaxis="0 -1 1"/>
            <camera name="fixedtop" pos="0 0 5" zaxis="0 0 1"/>
            </b>''')
        worldbody['camera'] = cameras['b']['camera']

        # Build and add a tracking camera (logic needed to ensure orientation correct)
        theta = self.robot_rot
        xyaxes = dict(x1=np.cos(theta),
                      x2=-np.sin(theta),
                      x3=0,
                      y1=np.sin(theta),
                      y2=np.cos(theta),
                      y3=1)
        pos = dict(xp=0 * np.cos(theta) + (-2) * np.sin(theta),
                   yp=0 * (-np.sin(theta)) + (-2) * np.cos(theta),
                   zp=2)
        track_camera = xmltodict.parse('''<b>
            <camera name="track" mode="track" pos="{xp} {yp} {zp}" xyaxes="{x1} {x2} {x3} {y1} {y2} {y3}"/>
            </b>'''.format(**pos, **xyaxes))

        # support multiple cameras on 'body'
        if not isinstance(worldbody['body'][0]['camera'], list):
            worldbody['body'][0]['camera'] = [worldbody['body'][0]['camera']]
        worldbody['body'][0]['camera'].append(track_camera['b']['camera'])

        # Add objects to the XML dictionary
        for name, object in self.objects.items():
            assert object['name'] == name, f'Inconsistent {name} {object}'
            object = object.copy()  # don't modify original object
            object['quat'] = rot2quat(object['rot'])
            if name == 'box':
                dim = object['size'][0]
                object['height'] = object['size'][-1]
                object['width'] = dim / 2
                object['x'] = dim
                object['y'] = dim
                body = xmltodict.parse(
                    '''
                    <body name="{name}" pos="{pos}" quat="{quat}">
                        <freejoint name="{name}"/>
                        <geom name="{name}" type="{type}" size="{size}" density="{density}"
                            rgba="{rgba}" group="{group}"/>
                        <geom name="col1" type="{type}" size="{width} {width} {height}" density="{density}"
                            rgba="{rgba}" group="{group}" pos="{x} {y} 0"/>
                        <geom name="col2" type="{type}" size="{width} {width} {height}" density="{density}"
                            rgba="{rgba}" group="{group}" pos="-{x} {y} 0"/>
                        <geom name="col3" type="{type}" size="{width} {width} {height}" density="{density}"
                            rgba="{rgba}" group="{group}" pos="{x} -{y} 0"/>
                        <geom name="col4" type="{type}" size="{width} {width} {height}" density="{density}"
                            rgba="{rgba}" group="{group}" pos="-{x} -{y} 0"/>
                    </body>
                '''.format(**{k: convert(v)
                              for k, v in object.items()}))
            else:
                body = xmltodict.parse(
                    '''
                    <body name="{name}" pos="{pos}" quat="{quat}">
                        <freejoint name="{name}"/>
                        <geom name="{name}" type="{type}" size="{size}" density="{density}"
                            rgba="{rgba}" group="{group}"/>
                    </body>
                '''.format(**{k: convert(v)
                              for k, v in object.items()}))
            # Append new body to world, making it a list optionally
            # Add the object to the world
            worldbody['body'].append(body['body'])
        # Add mocaps to the XML dictionary
        for name, mocap in self.mocaps.items():
            # Mocap names are suffixed with 'mocap'
            assert mocap['name'] == name, f'Inconsistent {name} {object}'
            assert name.replace(
                'mocap', 'obj') in self.objects, f'missing object for {name}'
            # Add the object to the world
            mocap = mocap.copy()  # don't modify original object
            mocap['quat'] = rot2quat(mocap['rot'])
            body = xmltodict.parse('''
                <body name="{name}" mocap="true">
                    <geom name="{name}" type="{type}" size="{size}" rgba="{rgba}"
                        pos="{pos}" quat="{quat}" contype="0" conaffinity="0" group="{group}"/>
                </body>
            '''.format(**{k: convert(v)
                          for k, v in mocap.items()}))
            worldbody['body'].append(body['body'])
            # Add weld to equality list
            mocap['body1'] = name
            mocap['body2'] = name.replace('mocap', 'obj')
            weld = xmltodict.parse('''
                <weld name="{name}" body1="{body1}" body2="{body2}" solref=".02 1.5"/>
            '''.format(**{k: convert(v)
                          for k, v in mocap.items()}))
            equality['weld'].append(weld['weld'])
        # Add geoms to XML dictionary
        for name, geom in self.geoms.items():
            assert geom['name'] == name, f'Inconsistent {name} {geom}'
            geom = geom.copy()  # don't modify original object
            geom['quat'] = rot2quat(geom['rot'])
            geom['contype'] = geom.get('contype', 1)
            geom['conaffinity'] = geom.get('conaffinity', 1)
            body = xmltodict.parse('''
                <body name="{name}" pos="{pos}" quat="{quat}">
                    <geom name="{name}" type="{type}" size="{size}" rgba="{rgba}" group="{group}"
                        contype="{contype}" conaffinity="{conaffinity}"/>
                </body>
            '''.format(**{k: convert(v)
                          for k, v in geom.items()}))
            # Append new body to world, making it a list optionally
            # Add the object to the world
            worldbody['body'].append(body['body'])

        # Instantiate simulator
        # print(xmltodict.unparse(self.xml, pretty=True))
        self.xml_string = xmltodict.unparse(self.xml)
        self.model = load_model_from_xml(self.xml_string)
        self.sim = MjSim(self.model)

        # Add render contexts to newly created sim
        if self.render_context is None and self.observe_vision:
            render_context = MjRenderContextOffscreen(
                self.sim, device_id=self.render_device_id, quiet=True)
            render_context.vopt.geomgroup[:] = 1
            self.render_context = render_context

        if self.render_context is not None:
            self.render_context.update_sim(self.sim)

        # Recompute simulation intrinsics from new position
        self.sim.forward()

    def rebuild(self, config={}, state=True):
        ''' Build a new sim from a model if the model changed '''
        if state:
            old_state = self.sim.get_state()
        #self.config.update(deepcopy(config))
        #self.parse(self.config)
        self.parse(config)
        self.build()
        if state:
            self.sim.set_state(old_state)
        self.sim.forward()

    def reset(self, build=True):
        ''' Reset the world (sim is accessed through self.sim) '''
        if build:
            self.build()
        # set flag so that renderer knows to update sim
        self.update_viewer_sim = True

    def render(self, mode='human'):
        ''' Render the environment to the screen '''
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            # Turn all the geom groups on
            self.viewer.vopt.geomgroup[:] = 1
            # Set camera if specified
            if mode == 'human':
                self.viewer.cam.fixedcamid = -1
                self.viewer.cam.type = const.CAMERA_FREE
            else:
                self.viewer.cam.fixedcamid = self.model.camera_name2id(mode)
                self.viewer.cam.type = const.CAMERA_FIXED
        if self.update_viewer_sim:
            self.viewer.update_sim(self.sim)
            self.update_viewer_sim = False
        self.viewer.render()

    def robot_com(self):
        ''' Get the position of the robot center of mass in the simulator world reference frame '''
        return self.body_com('robot')

    def robot_pos(self):
        ''' Get the position of the robot in the simulator world reference frame '''
        return self.body_pos('robot')

    def robot_mat(self):
        ''' Get the rotation matrix of the robot in the simulator world reference frame '''
        return self.body_mat('robot')

    def robot_vel(self):
        ''' Get the velocity of the robot in the simulator world reference frame '''
        return self.body_vel('robot')

    def body_com(self, name):
        ''' Get the center of mass of a named body in the simulator world reference frame '''
        return self.data.subtree_com[self.model.body_name2id(name)].copy()

    def body_pos(self, name):
        ''' Get the position of a named body in the simulator world reference frame '''
        return self.data.get_body_xpos(name).copy()

    def body_mat(self, name):
        ''' Get the rotation matrix of a named body in the simulator world reference frame '''
        return self.data.get_body_xmat(name).copy()

    def body_vel(self, name):
        ''' Get the velocity of a named body in the simulator world reference frame '''
        return self.data.get_body_xvelp(name).copy()
Пример #21
0
model = load_model_from_xml(MODEL_LEG_XML)
sim = MjSim(model)
viewer = MjViewer(sim)

qpos = np.zeros(2)
qvel = np.zeros(2)
old_state = sim.get_state()
angHip = 120.0 / 180 * np.pi
angKne = (180.0 - 40.0) / 180 * np.pi
qpos[0] = angHip  # hip joint initial angle
qpos[1] = np.pi - angKne  # knee joint initial angle
qvel[0] = 0.0  # hip joint initial velocity
qvel[1] = 0.0  # knee joint initial velocity
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
                                 old_state.act, old_state.udd_state)
sim.set_state(new_state)
sim.forward()

# viewer.render()

# create muscle
timestep = 5e-4
# Series elastic element (SE) force-length relationship
eref = 0.04     # [lslack] tendon reference strain
# excitation-contraction coupling
preAct = 0.01     # [] preactivation
tau = 0.01    # [s] delay time constant
# contractile element (CE) force-length relationship
w = 0.56   # [lopt] width
c = 0.05   # []; remaining force at +/- width
# CE force-velocity relationship
Пример #22
0
class Environment():
    def __init__(self,
                 model_name,
                 goal_space_train,
                 goal_space_test,
                 project_state_to_end_goal,
                 end_goal_thresholds,
                 initial_state_space,
                 subgoal_bounds,
                 project_state_to_subgoal,
                 subgoal_thresholds,
                 max_actions=1200,
                 num_frames_skip=10,
                 show=False):

        self.name = model_name

        # Create Mujoco Simulation
        self.model = load_model_from_path("./mujoco_files/" + model_name)
        self.sim = MjSim(self.model)

        # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks
        if model_name == "pendulum.xml":
            self.state_dim = 2 * len(self.sim.data.qpos) + len(
                self.sim.data.qvel)
        else:
            self.state_dim = len(self.sim.data.qpos) + len(
                self.sim.data.qvel
            )  # State will include (i) joint angles and (ii) joint velocities
        self.action_dim = len(
            self.sim.model.actuator_ctrlrange)  # low-level action dim
        self.action_bounds = self.sim.model.actuator_ctrlrange[:,
                                                               1]  # low-level action bounds
        self.action_offset = np.zeros((len(
            self.action_bounds)))  # Assumes symmetric low-level action ranges
        self.end_goal_dim = len(goal_space_test)
        self.subgoal_dim = len(subgoal_bounds)
        self.subgoal_bounds = subgoal_bounds

        # Projection functions
        self.project_state_to_end_goal = project_state_to_end_goal
        self.project_state_to_subgoal = project_state_to_subgoal

        # Convert subgoal bounds to symmetric bounds and offset.  Need these to properly configure subgoal actor networks
        self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds)))
        self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds)))

        for i in range(len(self.subgoal_bounds)):
            self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] -
                                                self.subgoal_bounds[i][0]) / 2
            self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][
                1] - self.subgoal_bounds_symmetric[i]

        # End goal/subgoal thresholds
        self.end_goal_thresholds = end_goal_thresholds
        self.subgoal_thresholds = subgoal_thresholds

        # Set inital state and goal state spaces
        self.initial_state_space = initial_state_space
        self.goal_space_train = goal_space_train
        self.goal_space_test = goal_space_test
        self.subgoal_colors = [
            "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon",
            "Gray", "White", "Black"
        ]

        self.max_actions = max_actions

        # Implement visualization if necessary
        self.visualize = show  # Visualization boolean
        if self.visualize:
            self.viewer = MjViewer(self.sim)
        self.num_frames_skip = num_frames_skip

        # The following variables are hardcoded for the tower building env:
        self.n_objects = 1  # try out 2-3 later
        self.min_tower_height = 1  # try out 2 later
        self.max_tower_height = 4  # try out 2-3 later

        self.initial_state = copy.deepcopy(self.sim.get_state())
        self.block_gripper = False
        self.n_substeps = 20
        self.gripper_extra_height = 0.0
        self.target_in_the_air = True  # maybe make dynamic?
        self.target_offset = 0.0
        self.obj_range = 0.15
        self.target_range = 0.15
        self.distance_threshold = 0.05
        self.initial_qpos = {
            'robot0:slide0': 0.0,
            'robot0:slide1': 0.0,
            'robot0:slide2': 0.0,
            'object0:joint': [0.1, 0.0, 0.05, 1., 0., 0., 0.],
            'object1:joint': [0.2, 0.0, 0.05, 1., 0., 0., 0.],
            'object2:joint': [0.3, 0.0, 0.05, 1., 0., 0., 0.],
            'object3:joint': [0.4, 0.0, 0.05, 1., 0., 0., 0.],
            'object4:joint': [0.5, 0.0, 0.05, 1., 0., 0., 0.],
        }
        self.reward_type = 'sparse'
        self.gripper_goal = 'gripper_none'  # can be 'gripper_none', 'gripper_random' and 'gripper_above'
        self.goal_size = (self.n_objects * 3)
        if self.gripper_goal != 'gripper_none':
            self.goal_size += 3
        gripperSkipIdx = 0 if self.gripper_goal == "gripper_none" else 3
        objectIdx = gripperSkipIdx + self.n_objects * 3

        self.table_height = 0.5
        self.obj_height = 0.05

        if self.name == "assets/fetch/build_tower.xml":
            self._env_setup(self.initial_qpos)
            self.state_dim = self._get_obs_len()
            self.action_dim = 4
            #self.action_bounds = np.array([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])#self.sim.model.actuator_ctrlrange[:, 1]
            self.action_bounds = np.ones((self.action_dim))
            self.action_offset = np.zeros(
                (self.action_dim))  # Assumes symmetric low-level action ranges

            self.end_goal_dim = self.goal_size
            self.project_state_to_end_goal = self._obs2goal_tower
            self.end_goal_thresholds = [0.05 for i in range(self.goal_size)]

            self.use_full_state_space_as_subgoal_space = True

            if not self.use_full_state_space_as_subgoal_space:
                self.subgoal_dim = self.goal_size
                self.project_state_to_subgoal = self._obs2goal_tower
                self.subgoal_bounds_offset = np.concatenate(
                    [[1, 0.25, 0.55] for i in range(self.subgoal_dim // 3)])
                self.subgoal_bounds_symmetric = np.ones((self.subgoal_dim))
                self.subgoal_bounds = [([self.table_height, 1.5] if
                                        (i + 1) % 3 == 0 else [0, 1.5])
                                       for i in range(self.subgoal_dim)]
            else:

                #self.subgoal_dim = self.state_dim
                self.subgoal_dim = 3 + self.n_objects * 3
                self.project_state_to_subgoal = self._obs2goal_subgoal
                self.subgoal_bounds = [[0, 1.5]
                                       for _ in range(self.subgoal_dim)]
                #self.subgoal_bounds_offset = np.zeros((self.subgoal_dim))
                #objects_offset = np.concatenate([[1, 0.25, 0.55] for i in range(self.n_objects)])
                #self.subgoal_bounds_offset = np.zeros(self.subgoal_dim)
                #self.subgoal_bounds_offset[3:3+3*self.n_objects] = objects_offset
                #self.subgoal_bounds_offset = np.concatenate([[1, 0.25, 0.55] if gripperSkipIdx <= i <= objectIdx
                #                                             else [0, 0, 0] for i in range(0, self.subgoal_dim, 3)])

                #self.subgoal_bounds_symmetric = np.ones((self.subgoal_dim)) * 2
                #self.subgoal_bounds = [([self.table_height, 1.5] if (i+1) % 3 == 0 else [0, 1.5]) if
                #                       3 <= i < 3+3*self.n_objects else [-7, 7] for i in range(self.subgoal_dim)]
            # in ur5 subgoal bound: np.array([[-2 * np.pi, 2 * np.pi], [-2 * np.pi, 2 * np.pi], [-2 * np.pi, 2 * np.pi], [-4, 4], [-4, 4], [-4, 4]])

            # Try the following, borrowed from original code
            self.subgoal_bounds_symmetric = np.zeros(
                (len(self.subgoal_bounds)))
            self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds)))

            for i in range(len(self.subgoal_bounds)):
                self.subgoal_bounds_symmetric[i] = (
                    self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2
                self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][
                    1] - self.subgoal_bounds_symmetric[i]

            print("Subgoal offset:", self.subgoal_bounds_offset)
            print("Subgoal bounds:", self.subgoal_bounds)
            #pos_threshold = 0.05
            #angle_threshold = np.deg2rad(10)
            #velo_threshold = 2
            #objects_threshold = np.concatenate([[pos_threshold for _ in range(3)] for _ in range(self.n_objects * 2)])
            #objects_rotation = np.concatenate([[angle_threshold for _ in range(3)] for _ in range(self.n_objects)])
            #objects_velo_pos = np.concatenate([[velo_threshold for _ in range(3)] for _ in range(self.n_objects)])
            #self.subgoal_thresholds = np.concatenate((np.array([pos_threshold for i in range(3)]), objects_threshold,
            #                                     np.array([pos_threshold for i in range(2)]), objects_rotation,
            #                                     objects_velo_pos, objects_rotation * 4,
            #                                          np.array([velo_threshold for i in range(3)]),
            #                                          np.array([velo_threshold for i in range(2)])))
            self.subgoal_thresholds = [0.05 for i in range(self.subgoal_dim)]
            print("Subgoal thresholds: ", self.subgoal_thresholds)
            print("Shape thresholds:", np.shape(self.subgoal_thresholds))

            if __debug__:
                print("Action bounds: ", self.action_bounds)

    def _is_success(self, achieved_goal, desired_goal):
        d = goal_distance(achieved_goal, desired_goal)
        return (d < self.distance_threshold).astype(np.float32)

    def get_agent_params(self, agent_params):
        agent_params["subgoal_noise"] = [0.03 for i in range(self.subgoal_dim)]
        return agent_params

    def _obs2goal_tower(self, sim, obs):
        if self.gripper_goal != 'gripper_none':
            goal = obs[:self.goal_size]
        else:
            goal = obs[3:self.goal_size + 3]
        return goal

    def _obs2goal_subgoal(self, sim, obs):
        #return np.concatenate((np.array([bound_angle(sim.data.qpos[i]) for i in range(len(sim.data.qpos))]),
        #     np.array([4 if sim.data.qvel[i] > 4 else -4 if sim.data.qvel[i] < -4 else sim.data.qvel[i] for i in
        #               range(len(sim.data.qvel))])))

        #return obs

        return obs[:3 + self.n_objects * 3]

    # Get state for tower building env:
    def _get_obs(self):

        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
        # positions
        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt

        robot_qpos, robot_qvel = self.sim.data.qpos, self.sim.data.qvel
        object_pos, object_rot, object_velp, object_velr = ([]
                                                            for _ in range(4))
        object_rel_pos = []

        if self.n_objects > 0:
            for n_o in range(self.n_objects):
                oname = 'object{}'.format(n_o)
                this_object_pos = self.sim.data.get_site_xpos(oname)
                # rotations
                this_object_rot = rotations.mat2euler(
                    self.sim.data.get_site_xmat(oname))
                # velocities
                this_object_velp = self.sim.data.get_site_xvelp(oname) * dt
                this_object_velr = self.sim.data.get_site_xvelr(oname) * dt
                # gripper state
                this_object_rel_pos = this_object_pos - grip_pos
                this_object_velp -= grip_velp

                object_pos = np.concatenate([object_pos, this_object_pos])
                object_rot = np.concatenate([object_rot, this_object_rot])
                object_velp = np.concatenate([object_velp, this_object_velp])
                object_velr = np.concatenate([object_velr, this_object_velr])
                object_rel_pos = np.concatenate(
                    [object_rel_pos, this_object_rel_pos])
        else:
            object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.array(
                np.zeros(3))

        gripper_state = robot_qpos[-2:]
        gripper_vel = robot_qvel[
            -2:] * dt  # change to a scalar if the gripper is made symmetric

        obs = np.concatenate([
            grip_pos,
            object_pos.ravel(),
            object_rel_pos.ravel(),
            gripper_state,
            object_rot.ravel(),
            object_velp.ravel(),
            object_velr.ravel(),
            grip_velp,
            gripper_vel,
        ])
        return obs

    # Get state len:
    def _get_obs_len(self):

        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
        # positions
        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt

        robot_qpos, robot_qvel = self.sim.data.qpos, self.sim.data.qvel
        object_pos, object_rot, object_velp, object_velr = ([]
                                                            for _ in range(4))
        object_rel_pos = []

        if self.n_objects > 0:
            for n_o in range(self.n_objects):
                oname = 'object{}'.format(n_o)
                this_object_pos = self.sim.data.get_site_xpos(oname)
                # rotations
                this_object_rot = rotations.mat2euler(
                    self.sim.data.get_site_xmat(oname))
                # velocities
                this_object_velp = self.sim.data.get_site_xvelp(oname) * dt
                this_object_velr = self.sim.data.get_site_xvelr(oname) * dt
                # gripper state
                this_object_rel_pos = this_object_pos - grip_pos
                this_object_velp -= grip_velp

                object_pos = np.concatenate([object_pos, this_object_pos])
                object_rot = np.concatenate([object_rot, this_object_rot])
                object_velp = np.concatenate([object_velp, this_object_velp])
                object_velr = np.concatenate([object_velr, this_object_velr])
                object_rel_pos = np.concatenate(
                    [object_rel_pos, this_object_rel_pos])
        else:
            object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.array(
                np.zeros(3))

        gripper_state = robot_qpos[-2:]
        gripper_vel = robot_qvel[
            -2:] * dt  # change to a scalar if the gripper is made symmetric

        obs = np.concatenate([
            grip_pos,
            object_pos.ravel(),
            object_rel_pos.ravel(),
            gripper_state,
            object_rot.ravel(),
            object_velp.ravel(),
            object_velr.ravel(),
            grip_velp,
            gripper_vel,
        ])
        print("obs len: ", len(obs))
        return len(obs)

    # Get state, which concatenates joint positions and velocities
    def get_state(self):

        if self.name == "pendulum.xml":
            return np.concatenate([
                np.cos(self.sim.data.qpos),
                np.sin(self.sim.data.qpos), self.sim.data.qvel
            ])
        elif self.name == "assets/fetch/build_tower.xml":
            return self._get_obs()
        else:
            return np.concatenate((self.sim.data.qpos, self.sim.data.qvel))

    def _env_setup(self, initial_qpos):
        for name, value in initial_qpos.items():
            self.sim.data.set_joint_qpos(name, value)
        utils.reset_mocap_welds(self.sim)
        self.sim.forward()

        # Move end effector into position.
        gripper_target = np.array([
            -0.498, 0.005, -0.431 + self.gripper_extra_height
        ]) + self.sim.data.get_site_xpos('robot0:grip')

        gripper_rotation = np.array([1., 0., 1., 0.])
        self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
        self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
        for _ in range(10):
            self.sim.step()

        # Extract information for sampling goals.
        self.initial_gripper_xpos = self.sim.data.get_site_xpos(
            'robot0:grip').copy()
        print("inital gripper xpos", self.initial_gripper_xpos)
        if self.n_objects > 0:
            self.height_offset = self.sim.data.get_site_xpos('object0')[2]

    def _reset_sim_tower(self):
        self.step_ctr = 0
        self.sim.set_state(self.initial_state)

        # Randomize start position of objects.
        for o in range(self.n_objects):
            oname = 'object{}'.format(o)
            object_xpos = self.initial_gripper_xpos[:2]
            close = True
            while close:
                # self.initial_gripper_xpos[:2]
                object_xpos = [1, 0.25] + np.random.uniform(
                    -self.obj_range, self.obj_range, size=2)
                close = False
                dist_to_nearest = np.linalg.norm(object_xpos -
                                                 self.initial_gripper_xpos[:2])
                # Iterate through all previously placed boxes and select closest:
                for o_other in range(o):
                    other_xpos = self.sim.data.get_joint_qpos(
                        'object{}:joint'.format(o_other))[:2]
                    dist = np.linalg.norm(object_xpos - other_xpos)
                    dist_to_nearest = min(dist, dist_to_nearest)
                if dist_to_nearest < 0.1:
                    close = True

            object_qpos = self.sim.data.get_joint_qpos(
                '{}:joint'.format(oname))
            assert object_qpos.shape == (7, )
            object_qpos[:2] = object_xpos
            object_qpos[2] = self.table_height + (self.obj_height /
                                                  2) * self.obj_height * 1.05
            self.sim.data.set_joint_qpos('{}:joint'.format(oname), object_qpos)

        self.sim.forward()
        return True

    # Reset simulation to state within initial state specified by user
    def reset_sim(self):

        if self.name == "assets/fetch/build_tower.xml":
            self._reset_sim_tower()
        else:

            # Reset joint positions and velocities
            for i in range(len(self.sim.data.qpos)):
                self.sim.data.qpos[i] = np.random.uniform(
                    self.initial_state_space[i][0],
                    self.initial_state_space[i][1])

            for i in range(len(self.sim.data.qvel)):
                self.sim.data.qvel[i] = np.random.uniform(
                    self.initial_state_space[len(self.sim.data.qpos) + i][0],
                    self.initial_state_space[len(self.sim.data.qpos) + i][1])

            self.sim.step()

        # Return state
        return self.get_state()

    # Set action for tower building env:
    def _set_action(self, action):
        assert action.shape == (4, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
        self.step_ctr += 1

    # Execute low-level action for number of frames specified by num_frames_skip
    def execute_action(self, action):
        if __debug__:
            print("action: ", action)
        if self.name == "assets/fetch/build_tower.xml":
            self._set_action(action)
            #self.sim.data.ctrl[:] = action
        else:
            self.sim.data.ctrl[:] = action
        for _ in range(self.num_frames_skip):
            self.sim.step()
            if self.visualize:
                self.viewer.render()

        return self.get_state()

    # Visualize end goal.  This function may need to be adjusted for new environments.
    def display_end_goal(self, end_goal):
        # Goal can be visualized by changing the location of the relevant site object.
        if self.name == "pendulum.xml":
            self.sim.data.mocap_pos[0] = np.array([
                0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6
            ])
        elif self.name == "assets/fetch/build_tower.xml":
            for i in range(self.n_objects):
                self.model.body_pos[-13 + i] = end_goal[i * 3:(i * 3) + 3]
        elif self.name == "ur5.xml":

            theta_1 = end_goal[0]
            theta_2 = end_goal[1]
            theta_3 = end_goal[2]

            # shoulder_pos_1 = np.array([0,0,0,1])
            upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
            forearm_pos_3 = np.array([0.425, 0, 0, 1])
            wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

            # Transformation matrix from shoulder to base reference frame
            T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159],
                              [0, 0, 0, 1]])

            # Transformation matrix from upper arm to shoulder reference frame
            T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                              [np.sin(theta_1),
                               np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                              [0, 0, 0, 1]])

            # Transformation matrix from forearm to upper arm reference frame
            T_3_2 = np.array([[np.cos(theta_2), 0,
                               np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                              [-np.sin(theta_2), 0,
                               np.cos(theta_2), 0], [0, 0, 0, 1]])

            # Transformation matrix from wrist 1 to forearm reference frame
            T_4_3 = np.array([[np.cos(theta_3), 0,
                               np.sin(theta_3), 0.425], [0, 1, 0, 0],
                              [-np.sin(theta_3), 0,
                               np.cos(theta_3), 0], [0, 0, 0, 1]])

            # Determine joint position relative to original reference frame
            # shoulder_pos = T_1_0.dot(shoulder_pos_1)
            upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
            forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3]
            wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                wrist_1_pos_4)[:3]

            joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
            """
            print("\nEnd Goal Joint Pos: ")
            print("Upper Arm Pos: ", joint_pos[0])
            print("Forearm Pos: ", joint_pos[1])
            print("Wrist Pos: ", joint_pos[2])
            """

            for i in range(3):
                self.sim.data.mocap_pos[i] = joint_pos[i]

        else:
            assert False, "Provide display end goal function in environment.py file"

    def _sample_goal_tower(self):
        obs = self._get_obs()
        target_goal = None
        if obs is not None:
            # I changed obs['observation'] into obs because it is just a numpy array
            if self.gripper_goal != 'gripper_none':
                goal = obs.copy()[:self.goal_size]
            else:
                goal = obs.copy()[3:(self.goal_size + 3)]

            if self.gripper_goal != 'gripper_none' and self.n_objects > 0:
                target_goal_start_idx = 3
            else:
                target_goal_start_idx = 0

            stack_tower = (self.max_tower_height - self.min_tower_height +
                           1) == self.n_objects

            if not stack_tower:
                if self.n_objects > 0:
                    target_range = self.n_objects
                else:
                    target_range = 1
                for n_o in range(target_range):
                    # too_close = True
                    while True:
                        # self.initial_gripper_xpos[:3]
                        target_goal = [
                            1, 0.25, self.table_height + 0.05
                        ] + np.random.uniform(
                            -self.target_range, self.target_range, size=3)
                        target_goal += self.target_offset
                        rnd_height = random.randint(self.min_tower_height,
                                                    self.max_tower_height)
                        self.goal_tower_height = rnd_height
                        target_goal[2] = self.table_height + (
                            rnd_height * self.obj_height) - (self.obj_height /
                                                             2)
                        too_close = False
                        for i in range(0, target_goal_start_idx, 3):
                            other_loc = goal[i:i + 3]
                            dist = np.linalg.norm(other_loc[:2] -
                                                  target_goal[:2],
                                                  axis=-1)
                            if dist < 0.1:
                                too_close = True
                        if too_close is False:
                            break

                    goal[target_goal_start_idx:target_goal_start_idx +
                         3] = target_goal.copy()
                    target_goal_start_idx += 3
            else:
                target_goal_xy = [1, 0.25] + np.random.uniform(
                    -self.target_range, self.target_range, size=2)
                self.goal_tower_height = self.n_objects
                for n_o in range(self.n_objects):
                    height = n_o + 1
                    target_z = self.table_height + (
                        height * self.obj_height) - (self.obj_height / 2)
                    target_goal = np.concatenate((target_goal_xy, [target_z]))
                    goal[target_goal_start_idx:target_goal_start_idx +
                         3] = target_goal.copy()
                    target_goal_start_idx += 3

            # Final gripper position
            if self.gripper_goal != 'gripper_none':
                gripper_goal_pos = goal.copy()[-3:]
                if self.gripper_goal == 'gripper_above':
                    gripper_goal_pos[2] += (3 * self.obj_height)
                elif self.gripper_goal == 'gripper_random':
                    too_close = True
                    while too_close:
                        gripper_goal_pos = [1, 0.25, self.table_height + 0.05] + \
                                           np.random.uniform(-self.target_range,
                                                                  self.target_range, size=3)
                        # gripper_goal_pos[0] += 0.25
                        gripper_goal_pos[2] += 0.14
                        if np.linalg.norm(gripper_goal_pos - target_goal,
                                          axis=-1) >= 0.1:
                            too_close = False
                goal[:3] = gripper_goal_pos
            return goal.copy()
        else:
            return []

    # Function returns an end goal
    def get_next_goal(self, test):

        end_goal = np.zeros((len(self.goal_space_test)))

        if self.name == "ur5.xml":

            goal_possible = False
            while not goal_possible:
                end_goal = np.zeros(shape=(self.end_goal_dim, ))
                end_goal[0] = np.random.uniform(self.goal_space_test[0][0],
                                                self.goal_space_test[0][1])

                end_goal[1] = np.random.uniform(self.goal_space_test[1][0],
                                                self.goal_space_test[1][1])
                end_goal[2] = np.random.uniform(self.goal_space_test[2][0],
                                                self.goal_space_test[2][1])

                # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground)

                theta_1 = end_goal[0]
                theta_2 = end_goal[1]
                theta_3 = end_goal[2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                # Make sure wrist 1 pos is above ground so can actually be reached
                if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[
                        2] > 0.05 and wrist_1_pos[2] > 0.15:
                    goal_possible = True
        elif self.name == "assets/fetch/build_tower.xml":
            end_goal = self._sample_goal_tower()

        elif not test and self.goal_space_train is not None:
            for i in range(len(self.goal_space_train)):
                end_goal[i] = np.random.uniform(self.goal_space_train[i][0],
                                                self.goal_space_train[i][1])
        else:
            assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file"

            for i in range(len(self.goal_space_test)):
                end_goal[i] = np.random.uniform(self.goal_space_test[i][0],
                                                self.goal_space_test[i][1])

        # Visualize End Goal
        self.display_end_goal(end_goal)

        return end_goal

    # Visualize all subgoals
    def display_subgoals(self, subgoals):

        # Display up to 10 subgoals and end goal
        if len(subgoals) <= 11:
            subgoal_ind = 0
        else:
            subgoal_ind = len(subgoals) - 11

        for i in range(1, min(len(subgoals), 11)):
            if self.name == "pendulum.xml":
                self.sim.data.mocap_pos[i] = np.array([
                    0.5 * np.sin(subgoals[subgoal_ind][0]), 0,
                    0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6
                ])
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1
            elif self.name == "assets/fetch/build_tower.xml":
                # display only the goal positions of the blocks
                if self.use_full_state_space_as_subgoal_space:
                    #for n in range(self.n_objects):
                    #    self.model.body_pos[-9 + (3 * (i-1)) + n] = self._obs2goal_tower(self.sim, subgoals[i-1])[n*3:(n*3)+3]

                    # Gripper pos:
                    self.model.body_pos[-1] = subgoals[i - 1][0:3]
                    # Objects pos:
                    for n in range(self.n_objects):
                        self.model.body_pos[-10 + (3 * (i - 1)) +
                                            n] = subgoals[i -
                                                          1][3 +
                                                             n * 3:(n * 3) + 6]
                else:
                    for n in range(self.n_objects):
                        self.model.body_pos[-9 + (3 * (i - 1)) +
                                            n] = subgoals[i - 1][n *
                                                                 3:(n * 3) + 3]
                subgoal_ind += 1
            elif self.name == "ur5.xml":

                theta_1 = subgoals[subgoal_ind][0]
                theta_2 = subgoals[subgoal_ind][1]
                theta_3 = subgoals[subgoal_ind][2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                # Determine joint position relative to original reference frame
                # shoulder_pos = T_1_0.dot(shoulder_pos_1)
                upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
                """
                print("\nSubgoal %d Joint Pos: " % i)
                print("Upper Arm Pos: ", joint_pos[0])
                print("Forearm Pos: ", joint_pos[1])
                print("Wrist Pos: ", joint_pos[2])
                """

                # Designate site position for upper arm, forearm and wrist
                for j in range(3):
                    self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy(
                        joint_pos[j])
                    self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1

                # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3])

                subgoal_ind += 1
            else:
                # Visualize desired gripper position, which is elements 18-21 in subgoal vector
                self.sim.data.mocap_pos[i] = subgoals[subgoal_ind]
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1
Пример #23
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
Пример #24
0
def main(data_dir):
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    initial_state = MjSimState(time=0,
                               qpos=np.array([
                                   0, -np.pi / 4, 0, -3 * np.pi / 4 + 1, 0,
                                   np.pi / 2, np.pi / 4
                               ]),
                               qvel=np.zeros(7),
                               act=None,
                               udd_state={})
    sim.set_state(initial_state)
    sim.step()
    traj_count = 0
    control_state = None
    while (traj_count < NUM_TRAJECTORIES and control_state != FINISHED):
        control_state = ACCELERATING
        outputFile = None
        initial_q = sim.get_state()[q_INDEX]
        velGen = randVelGen.RandVelGenerator()
        qd_des = velGen.generatePatternVel()
        coming_back_time = 0.0
        time = 0
        while control_state != FINISHED:
            state = sim.get_state()
            q = state[q_INDEX]
            qd = state[qd_INDEX]
            boundViolations = bounds.getBoundViolations(q)
            # RD =
            TB = bounds.tableBoundViolation(sim)
            OB = bounds.outOfBounds(q)
            DA = helperFun.moving(qd)
            DC = helperFun.stopped(qd)
            DD = DC
            DB = coming_back_time > RETURN_TIME
            FN = traj_count >= NUM_TRAJECTORIES

            prev_state = control_state
            # transition block
            if control_state == ACCELERATING:
                if not TB and not OB and not DA:
                    control_state = ACCELERATING
                elif TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0.0
                elif not TB and OB:
                    control_state = DAMPING
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif not TB and not OB and DA:
                    control_state = COASTING
                    traj_count += 1
                    outputFile = open(data_dir +
                                      helperFun.getUniqueFileName("traj"),
                                      mode='x')
                    outputWriter = csv.writer(outputFile, delimiter=',')
                    print_count(traj_count)
                else:
                    control_state = FINISHED
                    print("Unknown transistion! ACCELERATING")

            elif control_state == COASTING:
                if not FN and not TB and not OB and DC:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                    outputFile.close()
                elif not FN and TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0
                    outputFile.close()
                elif not FN and not TB and OB:
                    control_state = DAMPING
                    outputFile.close()
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif FN:
                    control_state = FINISHED
                    outputFile.close()
                elif not FN and not TB and not OB and not DC:
                    control_state = COASTING
                else:
                    control_state = FINISHED
                    print("Unknown transition! COASTING")
                    outputFile.close()

            elif control_state == DAMPING:
                if not TB and not DD:
                    control_state = DAMPING
                elif TB:
                    control_state = COMING_BACK_IN_BOUNDS
                    coming_back_time = 0.0
                elif not TB and DD:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                else:
                    control_state = FINISHED
                    print("Unknow transition! DAMPING")

            elif control_state == COMING_BACK_IN_BOUNDS:
                if not DB:
                    control_state = COMING_BACK_IN_BOUNDS
                elif DB and OB:
                    control_state = DAMPING
                    curBoundViolations = bounds.getBoundViolations(q)
                    velGen.setJointDirections(curBoundViolations)
                elif DB and not OB:
                    control_state = ACCELERATING
                    qd_des = velGen.generatePatternVel()
                else:
                    control_state = FINISHED
                    print("Unknown transition! COMING_BACK_IN_BOUNDS")

            elif control_state == FINISHED:
                control_state = FINISHED

            else:
                control_state = FINISHED
                print("Got to an invalid state!")

            # debug states
            if prev_state != control_state:
                if control_state == ACCELERATING:
                    print("ACCELERATING")
                elif control_state == COASTING:
                    print("COASTING")
                elif control_state == DAMPING:
                    print("DAMPING")
                elif control_state == COMING_BACK_IN_BOUNDS:
                    print("COMING_BACK_IN_BOUNDS")
                elif control_state == "FINISHED":
                    print("FINISHED")
                else:
                    print("In a bad state!")

            torques = np.zeros(7)
            if control_state == ACCELERATING:
                torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd)

            elif control_state == COASTING:
                data = np.concatenate(
                    (q, qd, data_calc.get_3D_data(sim), [time]))
                outputWriter.writerow(data)
                torques = controllers.basicVelControl(qd_des=qd_des, qd_cur=qd)

            elif control_state == DAMPING:
                torques = controllers.dampingControl(qd)

            elif control_state == COMING_BACK_IN_BOUNDS:
                coming_back_time += TIMESTEP
                torques = controllers.moveAwayFromTable(q=q, qd=qd)

            elif control_state == FINISHED:
                outputFile.close()
                break
            else:
                print("Got to an invalid state!")
                control_state = FINISHED
                break

            sim.data.ctrl[:] = torques
            sim.step()
            viewer.render()
            time += TIMESTEP
Пример #25
0
def test_sim_state():
    model = load_model_from_xml(BASIC_MODEL_XML)

    foo = 10
    d = {"foo": foo,
         "foo_array": np.array([foo, foo, foo]),
         "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)),
         }

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    state = sim.get_state()
    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state.udd_state.keys():
        if (isinstance(state.udd_state[k], Number)):
            assert state.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state.udd_state[k], sim.udd_state[k])

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8)

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state2.udd_state.keys():
        if (isinstance(state2.udd_state[k], Number)):
            assert state2.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state2.udd_state[k], sim.udd_state[k])

    assert state2 == state
    assert not state2 != state

    # test equality with deleting keys
    state2 = state2._replace(udd_state={"foo": foo})
    assert state2 != state
    assert not (state2 == state)

    # test equality with changing contents of array
    state2 = state2._replace(
        udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])})
    assert state2 != state
    assert not (state2 == state)

    # test equality with adding keys
    d2 = dict(d)
    d2.update({"not_foo": foo})
    state2 = state2._replace(udd_state=d2)
    assert state2 != state
    assert not (state2 == state)

    # test defensive copy
    sim.set_state(state)
    state.qpos[0] = -1
    assert not np.array_equal(state.qpos, sim.data.qpos)

    state3 = sim.get_state()
    state3.qpos[0] = -1
    assert not np.array_equal(state3.qpos, sim.data.qpos)
    state3.udd_state["foo_array"][0] = -1
    assert not np.array_equal(
        state3.udd_state["foo_array"], sim.udd_state["foo_array"])

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    state = sim.get_state()
    print("state.udd_state = %s" % state.udd_state)

    assert state.udd_state == {}

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    assert state.udd_state == sim.udd_state
Пример #26
0
def test_sim_state():
    model = load_model_from_xml(BASIC_MODEL_XML)

    foo = 10
    d = {"foo": foo,
         "foo_array": np.array([foo, foo, foo]),
         "foo_2darray": np.reshape(np.array([foo, foo, foo, foo]), (2, 2)),
         }

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    state = sim.get_state()
    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state.udd_state.keys():
        if (isinstance(state.udd_state[k], Number)):
            assert state.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state.udd_state[k], sim.udd_state[k])

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == (1 + sim.model.nq + sim.model.nv + sim.model.na + 8)

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    for k in state2.udd_state.keys():
        if (isinstance(state2.udd_state[k], Number)):
            assert state2.udd_state[k] == sim.udd_state[k]
        else:
            assert np.array_equal(state2.udd_state[k], sim.udd_state[k])

    assert state2 == state
    assert not state2 != state

    # test equality with deleting keys
    state2 = state2._replace(udd_state={"foo": foo})
    assert state2 != state
    assert not (state2 == state)

    # test equality with changing contents of array
    state2 = state2._replace(
        udd_state={"foo": foo, "foo_array": np.array([foo, foo + 1])})
    assert state2 != state
    assert not (state2 == state)

    # test equality with adding keys
    d2 = dict(d)
    d2.update({"not_foo": foo})
    state2 = state2._replace(udd_state=d2)
    assert state2 != state
    assert not (state2 == state)

    # test defensive copy
    sim.set_state(state)
    state.qpos[0] = -1
    assert not np.array_equal(state.qpos, sim.data.qpos)

    state3 = sim.get_state()
    state3.qpos[0] = -1
    assert not np.array_equal(state3.qpos, sim.data.qpos)
    state3.udd_state["foo_array"][0] = -1
    assert not np.array_equal(
        state3.udd_state["foo_array"], sim.udd_state["foo_array"])

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    state = sim.get_state()
    print("state.udd_state = %s" % state.udd_state)

    assert state.udd_state == {}

    # test flatten, unflatten
    a = state.flatten()

    assert len(a) == 1 + sim.model.nq + sim.model.nv + sim.model.na

    state2 = MjSimState.from_flattened(a, sim)

    assert np.array_equal(state.time, sim.data.time)
    assert np.array_equal(state.qpos, sim.data.qpos)
    assert np.array_equal(state.qvel, sim.data.qvel)
    assert np.array_equal(state.act, sim.data.act)
    assert state.udd_state == sim.udd_state
Пример #27
0
def closeloop_run_fcn(model,
                      desired_kinematics,
                      P,
                      I,
                      delay_timesteps=0,
                      model_ver=0,
                      plot_outputs=True,
                      Mj_render=False,
                      timestep=.005):
    est_activations = estimate_activations_fcn(model, desired_kinematics)
    number_of_task_samples = desired_kinematics.shape[0]
    chassis_pos = np.zeros(number_of_task_samples, )
    input_kinematics = np.zeros(desired_kinematics.shape)
    real_attempt_positions = np.zeros([number_of_task_samples, 2])
    real_attempt_activations = np.zeros([number_of_task_samples, 3])
    q_error_cum = np.zeros([number_of_task_samples, 2])  # sample error history

    Mj_model = load_model_from_path(
        "./models/nmi_leg_w_chassis_v{}.xml".format(model_ver))
    sim = MjSim(Mj_model)

    if Mj_render:
        viewer = MjViewer(sim)
        viewer.cam.fixedcamid += 1
        viewer.cam.type = const.CAMERA_FIXED
    sim_state = sim.get_state()
    control_vector_length = sim.data.ctrl.__len__()
    print("control_vector_length: " + str(control_vector_length))

    sim.set_state(sim_state)
    gradient_edge_order = 1
    for ii in range(number_of_task_samples):
        if ii < max(gradient_edge_order, delay_timesteps + 1):
            #print(ii)
            input_kinematics[ii, :] = desired_kinematics[ii, :]
        else:
            [input_kinematics[ii, :],
             q_error_cum] = calculate_closeloop_inputkinematics(
                 step_number=ii,
                 real_attempt_positions=real_attempt_positions,
                 desired_kinematics=desired_kinematics,
                 q_error_cum=q_error_cum,
                 P=P,
                 I=I,
                 delay_timesteps=delay_timesteps,
                 gradient_edge_order=gradient_edge_order,
                 timestep=timestep)
        est_activations[ii, :] = model.predict([input_kinematics[ii, :]])[0, :]
        sim.data.ctrl[:] = est_activations[ii, :]
        sim.step()
        chassis_pos[ii] = sim.data.get_geom_xpos("Chassis_frame")[0]
        current_positions_array = sim.data.qpos[-2:]
        real_attempt_positions[ii, :] = current_positions_array
        real_attempt_activations[ii, :] = sim.data.ctrl
        if Mj_render:
            viewer.render()
    real_attempt_kinematics = positions_to_kinematics_fcn(
        real_attempt_positions[:, 0],
        real_attempt_positions[:, 1],
        timestep=timestep)
    error0 = error_cal_fcn(desired_kinematics[:, 0],
                           real_attempt_kinematics[:, 0])
    error1 = error_cal_fcn(desired_kinematics[:, 3],
                           real_attempt_kinematics[:, 3])
    average_error = 0.5 * (error0 + error1)
    if plot_outputs:
        #plt.figure()
        alpha = .8
        plot_t = np.linspace(timestep, desired_kinematics.shape[0] * timestep,
                             desired_kinematics.shape[0])
        plt.subplot(2, 1, 1)
        plt.plot(plot_t,
                 desired_kinematics[:, 0],
                 'k',
                 plot_t,
                 real_attempt_kinematics[:, 0],
                 'C1',
                 alpha=.9)
        plt.ylabel("$q_1$ (rads)")
        plt.subplot(2, 1, 2)
        plt.plot(plot_t,
                 desired_kinematics[:, 3],
                 'k',
                 plot_t,
                 real_attempt_kinematics[:, 3],
                 'C1',
                 alpha=.9)
        plt.ylabel("$q_2$  (rads)")
        plt.xlabel("time (s)")
        plt.show(block=True)
    return average_error, real_attempt_kinematics, real_attempt_activations
Пример #28
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self,
                 env_name,
                 rand_seed,
                 maximum_length,
                 model_path,
                 frame_skip,
                 misc_info={}):
        self._env_name = env_name
        self._rand_seed = rand_seed
        self._maximum_length = maximum_length
        self._misc_info = misc_info

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()

        # Why is this here? Causes errors
        #observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        #assert not done
        #self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        # annoying should replace
        high = np.inf * np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed(self._rand_seed)

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(self._rand_seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def get_env_state(self):
        """
        Get full state of the environment beyond qpos and qvel
        For example, if targets are defined using sites, this function should also
        contain location of the sites (which are not included in qpos).
        Must return a dictionary that can be used in the set_env_state function
        """
        raise NotImplementedError

    def set_env_state(self, state):
        """
        Uses the state dictionary to set the state of the world
        """
        raise NotImplementedError

    # -----------------------------

    def reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob, 0, False, {}

    def reset_soft(self, seed=None):
        return self._old_obs, 0, False, {}

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, ) and qvel.shape == (
            self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self,
                         policy,
                         horizon=1000,
                         num_episodes=1,
                         mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(
                    o)[0] if mode == 'exploration' else policy.get_action(
                        o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t + 1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self,
                                   policy,
                                   horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640, 480),
                                   mode='exploration',
                                   save_loc='./tmp/',
                                   filename='newvid',
                                   it=0,
                                   camera_name=None):

        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o, *_ = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy(o)
                o, r, d, _ = self.step(a)
                t = t + 1
                curr_frame = self.sim.render(width=frame_size[0],
                                             height=frame_size[1],
                                             mode='offscreen',
                                             camera_name=camera_name,
                                             device_id=0)
                arrs.append(curr_frame[::-1, :, :])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + str(it) + ".mp4"

            if not os.path.exists(save_loc):
                os.makedirs(save_loc)

            imageio.mimwrite(file_name, np.asarray(arrs), fps=10.0)
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f" % (t1 - t0))
Пример #29
0
def initial_config_from_mjcf(file, ee_list, verbose=False):
    """
    Parameters
    ----------
    file (str):
        mjcf file
    ee_list (list of str):
        End-effector body names

    Returns
    -------
    ret (dict):
        nq (tf.Tensor)
            Number of joint
        nu (tf.Tensor)
            Number of input
        S (tf.Tensor):
            Selection Matrix. This includes actuator gear.
            (1,nu, nq)
        dt (tf.Tensor):
            Timestep
        joint_armature (tf.Tensor):
            (nq,)
        g (tf.Tensor): Gravity
        Mlist (tf.Tensor):
            Link frame i relative to p(i) at the home position
            (nbody,6,6)
        Slist (tf.Tensor):
            Screw axes Si of the joints in a space frame.
            (nq,6)
        init_ee_SE3 (dict of tf.Tensor):
            Initial end-effector SE3
        Blist (dict of tf.Tensor):
            Joint screw axes in the end-effector frame when the
            manipulator is at the home position
            (nq_branch,6)
        Bidlist (dict of tf.Tensor):
            List of the jonit index related to the end-effector branch
            (nq_branch,)
        init_ee_SE3 (dict of tf.Tensor):
            (4,4)
        Glist (tf.Tensor):
            Spatial inertia matrices Gi of the links.
            (nbody,6,6)
        pidlist (tf.Tensor):
            Parent body index.
            (nbody,)

    """
    ret = dict()
    model = load_model_from_path(file)
    sim = MjSim(model)
    sim_state = sim.get_state()
    for i in range(sim.model.nq):
        sim_state.qpos[i] = 0.
        sim_state.qvel[i] = 0.
    sim.set_state(sim_state)
    sim.forward()

    nq, nv, nu = sim.model.nq, sim.model.nv, sim.model.nu
    n_ee = len(ee_list)
    njoint, nbody = sim.model.njnt, sim.model.nbody-n_ee-1

    assert sim.model.nq + 1 + n_ee == sim.model.nbody
    assert nq == nv
    assert nq == njoint
    assert nq == nbody

    S = None
    if nu is not 0:
        S = np.zeros((1,nu,nq))
        S[0,0:nu, nq-nu:nq] = np.diag(sim.model.actuator_gear[:,0])

    muj_world_id = 0
    muj_ee_id, muj_body_id, muj_body_name = [], [], []
    muj_global_body_pos, muj_global_body_SO3 = np.zeros((sim.model.nbody, 3)), np.zeros((sim.model.nbody, 3, 3))
    muj_global_joint_pos, muj_global_joint_SO3 = np.zeros((sim.model.nq, 3)), np.zeros((sim.model.nq, 3, 3))
    muj_global_body_SO3[0] = np.eye(3)
    muj_global_inertial_pos, muj_global_inertial_SO3 = np.zeros((sim.model.nbody, 3)), np.zeros((sim.model.nbody, 3, 3))
    for i in range(sim.model.nbody):
        if sim.model.body_names[i] == "world":
            muj_world_id = i
        elif sim.model.body_names[i] in ee_list:
            muj_ee_id.append(sim.model.body_name2id(sim.model.body_names[i]))
        else:
            muj_body_id.append(i)
            muj_body_name.append(sim.model.body_names[i])
        muj_global_body_SO3[i] = sim.data.get_body_xmat(sim.model.body_names[i])
        muj_global_body_pos[i] = sim.data.get_body_xpos(sim.model.body_names[i])
        muj_global_inertial_pos[i] = muj_global_body_pos[i] + sim.model.body_ipos[i]
        muj_global_inertial_SO3[i] = np.dot(muj_global_body_SO3[i], quat_to_SO3(sim.model.body_iquat[i]))
    for i in range(sim.model.nq):
        body_id = sim.model.jnt_bodyid[i]
        muj_global_joint_SO3[i] = muj_global_body_SO3[body_id]
        muj_global_joint_pos[i] = muj_global_body_pos[body_id] + np.dot(muj_global_body_SO3[body_id], sim.model.jnt_pos[i])
    Bidlist = dict()
    for ee in ee_list:
        idlist_reverse = []
        ee_parent = sim.model.body_parentid[sim.model.body_name2id(ee)]
        while True:
            j_st_id = sim.model.body_jntadr[ee_parent]
            nj = sim.model.body_jntnum[ee_parent]
            idlist_reverse.extend([*range(j_st_id, j_st_id+nj)][::-1])
            ee_parent = sim.model.body_parentid[ee_parent]
            if (sim.model.body_names[ee_parent]=='world'):
                break
        Bidlist[ee] = idlist_reverse[::-1]

    if verbose:
        print("="*80)
        print("Infos aboue Mujoco Model")
        print("-"*80)
        print("Global Body Pos")
        print(muj_global_body_pos)
        print("-"*80)
        print("Global Body Ori")
        print(muj_global_body_SO3)
        print("-"*80)
        print("Global Inertia Pos")
        print(muj_global_inertial_pos)
        print("-"*80)
        print("Global Inertia Ori")
        print(muj_global_inertial_SO3)
        print("-"*80)
        print("Global Joint Pos")
        print(muj_global_joint_pos)
        print("-"*80)
        print("Global Joint Ori")
        print(muj_global_joint_SO3)
        for ee in ee_list:
            print("-"*80)
            print("{} Related Joint Id".format(ee))
            print(Bidlist[ee])
        print("="*80)

    ret['nq'] = tf.convert_to_tensor(nq, tf.int32)
    ret['nu'] = tf.convert_to_tensor(nu, tf.int32)
    if S is None:
        ret['S'] = None
    else:
        ret['S'] = tf.convert_to_tensor(S, tf.float32)
    ret['dt'] = tf.convert_to_tensor(sim.model.opt.timestep, tf.float32)
    ret['joint_armature'] = tf.convert_to_tensor(sim.model.dof_armature, tf.float32)
    ret['g'] = tf.convert_to_tensor(sim.model.opt.gravity, tf.float32)
    pidlist = []
    for i in range(nbody):
        pname = sim.model.body_names[sim.model.body_parentid[sim.model.body_name2id(muj_body_name[i])]]
        if pname == "world":
            pidlist.append(-1)
        else:
            pidlist.append(muj_body_name.index(pname))
    ret['pidlist'] = tf.convert_to_tensor(np.array(pidlist), tf.int32)
    ret['Mlist'] = [None]*nbody
    ret['Glist'] = [None]*nbody
    for i in range(nbody):
        muj_id = muj_body_id[i]
        muj_pid = sim.model.body_parentid[muj_id]
        rel_pos = np.dot(muj_global_inertial_SO3[muj_pid].T, muj_global_inertial_pos[muj_id] - muj_global_inertial_pos[muj_pid])
        rel_SO3 = np.dot(muj_global_inertial_SO3[muj_pid].T, muj_global_inertial_SO3[muj_id])
        rel_SE3 = tf.squeeze(Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(rel_SO3,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(rel_pos,tf.float32),0)), 0)
        ret['Mlist'][i] = rel_SE3

        G = np.diag(sim.model.body_inertia[muj_id])
        mI = sim.model.body_mass[muj_id] * np.eye(3)
        Gi = np.zeros((6,6))
        Gi[0:3,0:3] = G
        Gi[3:6,3:6] = mI
        ret['Glist'][i] = tf.convert_to_tensor(Gi, tf.float32)
    ret['Mlist'] = tf.stack(ret['Mlist'],0)
    ret['Glist'] = tf.stack(ret['Glist'],0)

    ret['Slist'] = [None]*nq
    for i in range(nq):
        R = muj_global_joint_SO3[i]
        p = muj_global_joint_pos[i]
        Tsj = Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(R,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(p,tf.float32),0))
        adTsj = tf.squeeze(adjoint(Tsj),0)
        if sim.model.jnt_type[i] == 2:
            screw_axis = np.concatenate([np.zeros(3), sim.model.jnt_axis[i]], axis=0)
        elif sim.model.jnt_type[i] == 3:
            screw_axis = np.concatenate([sim.model.jnt_axis[i], np.zeros(3)], axis=0)
        else:
            raise ValueError("Wrong Joint Type")
        screw_axis = tf.expand_dims(tf.convert_to_tensor(screw_axis, tf.float32),axis=1)
        S = tf.squeeze(tf.matmul(adTsj, screw_axis),1)
        ret['Slist'][i] = S
    ret['Slist'] = tf.stack(ret['Slist'],0)

    ret['init_ee_SE3'] = dict()
    ret['Bidlist'] = dict()
    ret['Blist'] = dict()
    for ee in ee_list:
        p = tf.expand_dims(tf.convert_to_tensor(muj_global_body_pos[sim.model.body_name2id(ee)], tf.float32),0)
        R = tf.expand_dims(tf.convert_to_tensor(muj_global_body_SO3[sim.model.body_name2id(ee)], tf.float32),0)
        ret['init_ee_SE3'][ee] = tf.squeeze(Rp_to_SE3(R,p),0)
        ret['Bidlist'][ee] = [None]*len(Bidlist[ee])
        ret['Blist'][ee] = [None]*len(Bidlist[ee])
        for i, id in enumerate(Bidlist[ee]):
            ret['Bidlist'][ee][i] = id
            R = muj_global_joint_SO3[id]
            p = muj_global_joint_pos[id]
            Tsj = tf.squeeze(Rp_to_SE3(tf.expand_dims(tf.convert_to_tensor(R,tf.float32),0), tf.expand_dims(tf.convert_to_tensor(p,tf.float32),0)),0)
            Tsb = ret['init_ee_SE3'][ee]
            Tbs = tf.squeeze(SE3_inv(tf.expand_dims(Tsb,0)))
            Tbj = tf.matmul(Tbs,Tsj)
            adTbj = tf.squeeze(adjoint(tf.expand_dims(Tbj,0)),0)
            if sim.model.jnt_type[id] == 2:
                screw_axis = np.concatenate([np.zeros(3), sim.model.jnt_axis[id]], axis=0)
            elif sim.model.jnt_type[id] == 3:
                screw_axis = np.concatenate([sim.model.jnt_axis[id], np.zeros(3)], axis=0)
            else:
                raise ValueError("Wrong Joint Type")
            screw_axis = tf.expand_dims(tf.convert_to_tensor(screw_axis, tf.float32),axis=1)
            B = tf.squeeze(tf.matmul(adTbj, screw_axis),1)
            ret['Blist'][ee][i] = B
        ret['Bidlist'][ee] = tf.stack(ret['Bidlist'][ee],0)
        ret['Blist'][ee] = tf.stack(ret['Blist'][ee],0)

    if verbose:
        print("="*80)
        print("Infos about Return Value")
        pretty_print_dictionary(ret)
        print("="*80)

    return ret
Пример #30
0
class GripperTester:
    """
    A class that is used to test gripper

    Args:
        gripper (GripperModel): A gripper instance to be tested
        pos (str): (x y z) position to place the gripper in string form, e.g. '0 0 0.3'
        quat (str): rotation to apply to gripper in string form, e.g. '0 0 1 0' to flip z axis
        gripper_low_pos (float): controls the gipper y position, larger -> higher
        gripper_high_pos (float): controls the gipper y high position larger -> higher,
            must be larger than gripper_low_pos
        box_size (None or 3-tuple of int): the size of the box to grasp, None defaults to [0.02, 0.02, 0.02]
        box_density (int): the density of the box to grasp
        step_time (int): the interval between two gripper actions
        render (bool): if True, show rendering
    """

    def __init__(
        self,
        gripper,
        pos,
        quat,
        gripper_low_pos,
        gripper_high_pos,
        box_size=None,
        box_density=10000,
        step_time=400,
        render=True,
    ):
        # define viewer
        self.viewer = None

        world = MujocoWorldBase()
        # Add a table
        arena = TableArena(table_full_size=(0.4, 0.4, 0.1), table_offset=(0, 0, 0.1), has_legs=False)
        world.merge(arena)

        # Add a gripper
        self.gripper = gripper
        # Create another body with a slider joint to which we'll add this gripper
        gripper_body = ET.Element("body")
        gripper_body.set("pos", pos)
        gripper_body.set("quat", quat)  # flip z
        gripper_body.append(new_joint(name="gripper_z_joint", type="slide", axis="0 0 -1", damping="50"))
        # Add all gripper bodies to this higher level body
        for body in gripper.worldbody:
            gripper_body.append(body)
        # Merge the all of the gripper tags except its bodies
        world.merge(gripper, merge_body=None)
        # Manually add the higher level body we created
        world.worldbody.append(gripper_body)
        # Create a new actuator to control our slider joint
        world.actuator.append(new_actuator(joint="gripper_z_joint", act_type="position", name="gripper_z", kp="500"))

        # Add an object for grasping
        # density is in units kg / m3
        TABLE_TOP = [0, 0, 0.09]
        if box_size is None:
            box_size = [0.02, 0.02, 0.02]
        box_size = np.array(box_size)
        self.cube = BoxObject(
            name="object", size=box_size, rgba=[1, 0, 0, 1], friction=[1, 0.005, 0.0001], density=box_density
        )
        object_pos = np.array(TABLE_TOP + box_size * [0, 0, 1])
        mujoco_object = self.cube.get_obj()
        # Set the position of this object
        mujoco_object.set("pos", array_to_string(object_pos))
        # Add our object to the world body
        world.worldbody.append(mujoco_object)

        # add reference objects for x and y axes
        x_ref = BoxObject(
            name="x_ref", size=[0.01, 0.01, 0.01], rgba=[0, 1, 0, 1], obj_type="visual", joints=None
        ).get_obj()
        x_ref.set("pos", "0.2 0 0.105")
        world.worldbody.append(x_ref)
        y_ref = BoxObject(
            name="y_ref", size=[0.01, 0.01, 0.01], rgba=[0, 0, 1, 1], obj_type="visual", joints=None
        ).get_obj()
        y_ref.set("pos", "0 0.2 0.105")
        world.worldbody.append(y_ref)

        self.world = world
        self.render = render
        self.simulation_ready = False
        self.step_time = step_time
        self.cur_step = 0
        if gripper_low_pos > gripper_high_pos:
            raise ValueError(
                "gripper_low_pos {} is larger " "than gripper_high_pos {}".format(gripper_low_pos, gripper_high_pos)
            )
        self.gripper_low_pos = gripper_low_pos
        self.gripper_high_pos = gripper_high_pos

    def start_simulation(self):
        """
        Starts simulation of the test world
        """
        model = self.world.get_model(mode="mujoco_py")

        self.sim = MjSim(model)
        if self.render:
            self.viewer = MjViewer(self.sim)
        self.sim_state = self.sim.get_state()

        # For gravity correction
        gravity_corrected = ["gripper_z_joint"]
        self._gravity_corrected_qvels = [self.sim.model.get_joint_qvel_addr(x) for x in gravity_corrected]

        self.gripper_z_id = self.sim.model.actuator_name2id("gripper_z")
        self.gripper_z_is_low = False

        self.gripper_actuator_ids = [self.sim.model.actuator_name2id(x) for x in self.gripper.actuators]

        self.gripper_is_closed = True

        self.object_id = self.sim.model.body_name2id(self.cube.root_body)
        object_default_pos = self.sim.data.body_xpos[self.object_id]
        self.object_default_pos = np.array(object_default_pos, copy=True)

        self.reset()
        self.simulation_ready = True

    def reset(self):
        """
        Resets the simulation to the initial state
        """
        self.sim.set_state(self.sim_state)
        self.cur_step = 0

    def close(self):
        """
        Close the viewer if it exists
        """
        if self.viewer is not None:
            self.viewer.close()

    def step(self):
        """
        Forward the simulation by one timestep

        Raises:
            RuntimeError: if start_simulation is not yet called.
        """
        if not self.simulation_ready:
            raise RuntimeError("Call start_simulation before calling step")
        if self.gripper_z_is_low:
            self.sim.data.ctrl[self.gripper_z_id] = self.gripper_low_pos
        else:
            self.sim.data.ctrl[self.gripper_z_id] = self.gripper_high_pos
        if self.gripper_is_closed:
            self._apply_gripper_action(1)
        else:
            self._apply_gripper_action(-1)
        self._apply_gravity_compensation()
        self.sim.step()
        if self.render:
            self.viewer.render()
        self.cur_step += 1

    def _apply_gripper_action(self, action):
        """
        Applies binary gripper action

        Args:
            action (int): Action to apply. Should be -1 (open) or 1 (closed)
        """
        gripper_action_actual = self.gripper.format_action(np.array([action]))
        # rescale normalized gripper action to control ranges
        ctrl_range = self.sim.model.actuator_ctrlrange[self.gripper_actuator_ids]
        bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0])
        weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0])
        applied_gripper_action = bias + weight * gripper_action_actual
        self.sim.data.ctrl[self.gripper_actuator_ids] = applied_gripper_action

    def _apply_gravity_compensation(self):
        """
        Applies gravity compensation to the simulation
        """
        self.sim.data.qfrc_applied[self._gravity_corrected_qvels] = self.sim.data.qfrc_bias[
            self._gravity_corrected_qvels
        ]

    def loop(self, total_iters=1, test_y=False, y_baseline=0.01):
        """
        Performs lower, grip, raise and release actions of a gripper,
                each separated with T timesteps

        Args:
            total_iters (int): Iterations to perform before exiting
            test_y (bool): test if object is lifted
            y_baseline (float): threshold for determining that object is lifted
        """
        seq = [(False, False), (True, False), (True, True), (False, True)]
        for cur_iter in range(total_iters):
            for cur_plan in seq:
                self.gripper_z_is_low, self.gripper_is_closed = cur_plan
                for step in range(self.step_time):
                    self.step()
            if test_y:
                if not self.object_height > y_baseline:
                    raise ValueError(
                        "object is lifed by {}, ".format(self.object_height)
                        + "not reaching the requirement {}".format(y_baseline)
                    )

    @property
    def object_height(self):
        """
        Queries the height (z) of the object compared to on the ground

        Returns:
            float: Object height relative to default (ground) object position
        """
        return self.sim.data.body_xpos[self.object_id][2] - self.object_default_pos[2]
Пример #31
0
class PushPuckBase(ABC):
    def __init__(self,
                 n_substeps: int = 1,
                 render: bool = False):
        self.render = render

        set_puck(raw_xml_path=self.raw_xml_path, xml_path=self.xml_path, puck_size=None, puck_pos=None)
        model = load_model_from_path(self.xml_path)

        self.sim = MjSim(model=model, nsubsteps=n_substeps)
        self.viewer = MjViewer(self.sim) if render else None

        self.reset()

    def __call__(self, weights, extra_timesteps=1000):
        self.reset()
        return self.rollout(weights, extra_timesteps)

    def reset(self) -> None:
        """Resets the environment (including the agent) to the initial conditions.
        """
        self.sim.reset()
        # Set initial position and velocity
        self.qpos = self.sim.data.qpos.copy()
        self.qpos[:self.robot_init_qpos.shape[0]] = self.robot_init_qpos
        qvel = np.zeros(self.sim.data.qvel.shape)
        mjSimState = MjSimState(time=0.0, qpos=self.qpos, qvel=qvel, act=None, udd_state={})
        self.sim.set_state(mjSimState)
        self.sim.forward()

    @property
    def xml_path(self) -> str:
        return str(Path(__file__).resolve().parents[0]) + '/' + 'assets/xml_model/env_model.xml'

    def set_target(self, target_pos) -> None:
        """
        Sets the postion of the target.
        :param target_pos: Desired target position
        """
        if target_pos is None:
            target_pos = [0.7, 0, 0.02]

        body_id = self.sim.model.body_name2id('target')
        self.sim.model.body_pos[body_id] = target_pos
        self.sim.forward()

    @property
    def target_pos(self):
        """
        Helper for getting the current target position.
        :return: Current target position
        """
        return self.sim.data.get_site_xpos('target:site1').copy()

    @property
    def puck_pos(self):
        """
        Helper for getting the current puck position.
        :return: Current puck position
        """
        return self.sim.data.get_body_xpos('puck').copy()

    @property
    def tcp_pos(self):
        """
        Helper for getting the current end effector position.
        :return: Current end effector position
        """
        return self.sim.data.get_body_xpos('tcp').copy()

    @property
    @abstractmethod
    def raw_xml_path(self):
        raise NotImplementedError

    @property
    @abstractmethod
    def robot_init_qpos(self):
        raise NotImplementedError

    @property
    @abstractmethod
    def robot_final_qpos(self):
        raise NotImplementedError

    @abstractmethod
    def rollout(self, weights, goal_pos, extra_timesteps=200):
        raise NotImplementedError
Пример #32
0
    print("t: %5.3f" % time)
    print("qpos: ", qpos)
    print("qvel: ", qvel)
    print("tosser (slide,hinge): ", qpos[:2])
    print("object (z,y,pitch): ", qpos[-3:])


model = load_model_from_path("../../xmls/tosser.xml")
sim = MjSim(model)

viewer = MjViewer(sim)

sim_state = sim.get_state()

while True:
    sim.set_state(sim_state)

    for i in range(1000):
        state = sim.get_state()
        # time, qpos, qvel, act, udd_state = state.time, state.qpos, state.qvel, state.act, state.udd_state
        # print(time, qpos, qvel)
        print_state(state)
        if i < 150:
            sim.data.ctrl[0] = -0.0
            sim.data.ctrl[1] = -0.0
        else:
            sim.data.ctrl[0] = -1.0
            sim.data.ctrl[1] = -1.0
        sim.step()
        viewer.render()
Пример #33
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    def evaluate_success(self, paths, logger=None):
        """
        Log various success metrics calculated based on input paths into the logger
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640,480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1],
                                             mode='offscreen', camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))