Exemplo n.º 1
0
class WindySlope(gym.Env):
    def __init__(self,
                 model,
                 mode,
                 hertz=25,
                 should_render=True,
                 should_screenshot=False):
        self.hertz = hertz
        self.steps = 0
        self.should_render = should_render
        self.should_screenshot = should_screenshot
        self.nsubsteps = int(MAX_TIME / model.opt.timestep / 100)
        self.viewer = None
        self.model = model
        self.mode = mode
        self.enabled = True
        self.metadata = {'render.modes': 'rgb_array'}
        self.should_record = False

    def close(self):
        pass

    @property
    def observation_space(self):
        return Box(low=-np.inf, high=np.inf, shape=(18, ))

    @property
    def action_space(self):
        return Box(low=-np.inf, high=np.inf, shape=(0, ))

    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, model):
        self._model = model
        self.sim = MjSim(model)
        self.data = self.sim.data

        if self.should_render:
            if self.viewer:
                self.viewer.sim = sim
                return
            self.viewer = MjViewer(self.sim)
            self.viewer.cam.azimuth = 45
            self.viewer.cam.elevation = -20
            self.viewer.cam.distance = 25
            self.viewer.cam.lookat[:] = [0, 0, -2]
            self.viewer.scn.flags[3] = 0

    def reset(self):
        self.sim.reset()
        self.steps = 0

        self.sim.forward()

        obs = self.get_observations(self.model, self.data)
        return obs

    def get_observations(self, model, data):
        self.sim.forward()
        obs = []
        name = 'box'
        pos = data.get_body_xpos(name)
        xmat = data.get_body_xmat(name).reshape(-1)
        velp = data.get_body_xvelp(name)
        velr = data.get_body_xvelr(name)

        for x in [pos, xmat, velp, velr]:
            obs.extend(x.copy())

        obs = np.array(obs, dtype=np.float32)
        return obs

    def screenshot(self, image_path):
        self.viewer.hide_overlay = True
        self.viewer.render()
        width, height = 2560, 1440
        #width, height = 1,1
        img = self.viewer.read_pixels(width, height, depth=False)
        # original image is upside-down, so flip it
        img = img[::-1, :, :]
        imageio.imwrite(image_path, img)

    def step(self, action):
        nsubsteps = self.nsubsteps
        for _ in range(nsubsteps):
            self.sim.step()
            self.render()
        self.steps += 1

        return self.get_observations(self.model,
                                     self.data), 1, self.steps == 100, {}

    def render(self, mode=None):
        if self.should_render:
            self.viewer._overlay.clear()

            def nothing():
                return

            self.viewer._create_full_overlay = nothing
            wind = model.opt.wind[0]
            self.viewer.add_overlay(1, "Wind", "{:.2f}".format(wind))
            self.viewer.render()
            if self.should_record:
                width, height = 2560, 1440
                img = self.viewer.read_pixels(width, height, depth=False)
                # original image is upside-down, so flip it
                img = img[::-1, :, :]
                return img

    def euler2quat(self, euler):
        euler = np.asarray(euler, dtype=np.float64)
        assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)

        ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
        si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
        ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
        cc, cs = ci * ck, ci * sk
        sc, ss = si * ck, si * sk

        quat = np.empty(euler.shape[:-1] + (4, ), dtype=np.float64)
        quat[..., 0] = cj * cc + sj * ss
        quat[..., 3] = cj * sc - sj * cs
        quat[..., 2] = -(cj * ss + sj * cc)
        quat[..., 1] = cj * cs - sj * sc
        return quat

    def degrees2radians(self, degrees):
        return degrees * np.pi / 180
Exemplo n.º 2
0
class RobotEnv(object):
    def __init__(self):
        self.model = load_model_from_path(XML_PATH)
        self.sim = MjSim(self.model)
        self.viewer = MjViewer(self.sim)

        # finger instances
        self.front_finger = RollerFinger(idx=0,
                                         name='front',
                                         pos_base=np.array([0, 0.048, 0.085]),
                                         base_normal=np.array([0., -1., 0.]),
                                         base_horizontal=np.array([1., 0.,
                                                                   0.]))
        self.left_finger = RollerFinger(
            idx=1,
            name='left',
            pos_base=np.array([-0.04157, -0.024, 0.085]),
            base_normal=np.array([0.8660, 0.5, 0.]),
            base_horizontal=np.array([-0.5, 0.8660, 0.]))
        self.right_finger = RollerFinger(
            idx=2,
            name='right',
            pos_base=np.array([0.04157, -0.024, 0.085]),
            base_normal=np.array([-0.8660, 0.5, 0.]),
            base_horizontal=np.array([-0.5, -0.8660, 0.]))

        self.r_fingers = [
            self.front_finger, self.left_finger, self.right_finger
        ]

        # object target orientation in angle-axis representation
        self.axis = normalize_vec(np.array([0., 1., 1.]))
        self.angle = deg_to_rad(90)

        self.init_box_pos = np.array([0.0, 0.0, 0.2])  # obj initial pos
        self.target_box_pos = np.array([0.0, 0.0, 0.2])  # obj target pos

        self.max_step = 1500
        self.termination = False
        self.success = False
        self.timestep = 0

        self.k_vw = 0.5
        self.k_vv = 0.3

        self.reset()

    def reset(self, target_axis=np.array([0., 1., 1.]), target_angle=90):

        self.sim.reset()

        # Reset finger positions
        for fg in self.r_fingers:
            self.sim.data.qpos[fg.idx * 3] = fg.init_base_angle  # base angles
            self.sim.data.qpos[fg.idx * 3 + 1] = 0  # pivot angles

        my_map = [0, 3, 6, 1, 4, 7, 2, 5,
                  8]  # qpos and ctrl have different joint orders (see xml)
        for ii in range(9):
            self.sim.data.ctrl[ii] = self.sim.data.qpos[my_map[ii]]
        self.sim.step()

        self.termination = False
        self.success = False
        self.timestep = 0

        # reset target
        if target_axis is not None:
            self.axis = normalize_vec(target_axis)
        if target_angle is not None:
            self.angle = deg_to_rad(target_angle)
        self.quat_target = np.array([
            np.cos(self.angle / 2), self.axis[0] * np.sin(self.angle / 2),
            self.axis[1] * np.sin(self.angle / 2),
            self.axis[2] * np.sin(self.angle / 2)
        ])
        self.curr = self.sim.data.sensordata[
            -7:]  # quat_to_rot(self.sim.data.sensordata[-4:])
        self.rot_matrix_target = R.from_quat(quat_to_quat(
            self.quat_target)).as_matrix()
        self.test_min_err = 100

        self.session_name = str(self.axis) + ', ' + str(rad_to_deg(self.angle))
        return self.sim.data.sensordata

    def get_expert_action(self):
        # Sensor data
        curr_data = self.sim.data.sensordata
        self.cube_pos = curr_data[-7:-4]  # obj position
        self.cube_orientation = curr_data[-4:]  # obj orientation

        # compute each finger
        for fg in self.r_fingers:
            fg.base_rad = self.sim.data.qpos[fg.idx * 3]
            fg.q_pivot_prev = self.sim.data.qpos[fg.idx * 3 + 1]
            fg.q_pivot, fg.dq_roller = compute_pivot_and_roller(
                k_vw=self.k_vw,
                k_vv=self.k_vv,
                base_angle=fg.base_rad,
                pos_obj_curr=self.cube_pos,
                pos_obj_target=self.target_box_pos,
                ori_obj_curr=self.cube_orientation,
                ori_obj_target=self.quat_target,
                r_roller=fg.roller_radius,
                finger_length=fg.dist_pivot_to_base,
                pos_base=fg.pos_base,
                base_axis=fg.base_horizontal,
                finger_normal=fg.base_normal)

            dq_pivot = np.clip(fg.q_pivot - fg.q_pivot_prev, -fg.q_pivot_limit,
                               fg.q_pivot_limit)
            fg.q_pivot = fg.q_pivot_prev + dq_pivot

            self.sim.data.ctrl[fg.idx] = -fg.init_base_angle
            self.sim.data.ctrl[3 + fg.idx] = fg.pivot_gear_ratio * fg.q_pivot
            self.sim.data.ctrl[6 + fg.idx] += fg.dq_roller

        action = np.copy(self.sim.data.ctrl)
        return action

    def step(self, action):
        for i in range(len(action)):
            self.sim.data.ctrl[i] = action[i]

        self.viewer.add_overlay(const.GRID_TOPRIGHT, " ", self.session_name)
        self.viewer.render()
        self.sim.step()
        self.timestep += 1

        obs = self.sim.data.sensordata
        curr = obs[-7:]  # object pose and orientation

        err_curr_rot = get_quat_error(curr[3:], self.quat_target)
        err_curr_pos = get_pos_error(curr[:3], self.target_box_pos)
        err_curr = SCALE_ERROR_ROT * err_curr_rot + SCALE_ERROR_POS * err_curr_pos
        self.test_min_err = min(self.test_min_err, err_curr_rot)
        if err_curr < 15:
            self.termination = True
            self.success = True
        else:
            if self.timestep > self.max_step or err_curr_pos > 0.05:
                self.termination = True
                self.success = False
        #if self.termination:
        #    print("target axis",self.axis,"success",self.success,"min_error",self.test_min_err)
        return obs, self.termination, self.success