示例#1
0
    def _get_obs(self):
        # positions
        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
        robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
        if self.has_object:
            object_pos = self.sim.data.get_site_xpos('object0')
            # rotations
            object_rot = rotations.mat2euler(
                self.sim.data.get_site_xmat('object0'))
            # velocities
            object_velp = self.sim.data.get_site_xvelp('object0') * dt
            object_velr = self.sim.data.get_site_xvelr('object0') * dt
            # gripper state
            object_rel_pos = object_pos - grip_pos
            object_velp -= grip_velp
        else:
            object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(
                0)
        gripper_state = robot_qpos[-2:]
        gripper_vel = robot_qvel[
            -2:] * dt  # change to a scalar if the gripper is made symmetric

        if not self.has_object:
            achieved_goal = grip_pos.copy()
        else:
            achieved_goal = np.squeeze(object_pos.copy())
        obs = np.concatenate([
            grip_pos,
            object_pos.ravel(),
            object_rel_pos.ravel(),
            gripper_state,
            object_rot.ravel(),
            object_velp.ravel(),
            object_velr.ravel(),
            grip_velp,
            gripper_vel,
        ])

        return {
            'observation': obs.copy(),
            'achieved_goal': achieved_goal.copy(),
            'desired_goal': self.goal.copy(),
        }
    def _get_obs(self):

        obj_grp = self.max_n_objects if self.ai_object else 0

        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
        robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
        gripper_state = robot_qpos[-2:]
        gripper_vel = robot_qvel[
            -2:] * dt  # change to a scalar if the gripper is made symmetric

        obs_all = []

        object_pos, object_rot, object_velp, object_velr, object_rel_pos = [], [], [], [], []
        for i_object in range(obj_grp, obj_grp + self.n_objects):
            # observations for the robot
            object_pos.append(
                self.sim.data.get_site_xpos('object' + str(i_object)))
            # rotations
            object_rot.append(
                rotations.mat2euler(
                    self.sim.data.get_site_xmat('object' + str(i_object))))
            # velocities
            object_velp.append(
                self.sim.data.get_site_xvelp('object' + str(i_object)) * dt -
                grip_velp)
            object_velr.append(
                self.sim.data.get_site_xvelr('object' + str(i_object)) * dt)
            # gripper state
            object_rel_pos.append(
                self.sim.data.get_site_xpos('object' + str(i_object)) -
                grip_pos)

        object_pos = np.asarray(object_pos)
        object_rot = np.asarray(object_rot)
        object_velp = np.asarray(object_velp)
        object_velr = np.asarray(object_velr)
        object_rel_pos = np.asarray(object_rel_pos)

        obs = np.concatenate([
            grip_pos,
            object_pos.ravel(),
            object_rel_pos.ravel(), gripper_state,
            object_rot.ravel(),
            object_velp.ravel(),
            object_velr.ravel(), grip_velp, gripper_vel
        ])
        if self.observe_obj_grp:
            obs = np.concatenate([obs, np.asarray([obj_grp])])

        obs_all.append(obs.copy())

        target_pos = self.goal.copy().reshape(self.n_objects, -1)
        target_rel_pos, target_velp = [], []
        for i_object in range(obj_grp, obj_grp + self.n_objects):
            # observations for the object
            # object state wrt target
            target_rel_pos.append(target_pos[i_object % self.max_n_objects] -
                                  self.sim.data.get_site_xpos('object' +
                                                              str(i_object)))
            target_velp.append(
                0 -
                self.sim.data.get_site_xvelp('object' + str(i_object)) * dt)

        target_rel_pos = np.asarray(target_rel_pos)
        target_velp = np.asarray(target_velp)

        obs = np.concatenate([
            np.zeros_like(grip_pos),
            object_pos.ravel(),
            target_rel_pos.ravel(),
            np.zeros_like(gripper_state),
            object_rot.ravel(),
            target_velp.ravel(),
            object_velr.ravel(),
            np.zeros_like(grip_velp),
            np.zeros_like(gripper_vel)
        ])
        if self.observe_obj_grp:
            obs = np.concatenate([obs, np.asarray([self.max_n_objects])])

        obs_all.append(obs.copy())

        obs = np.asarray(obs_all)
        achieved_goal = object_pos.copy().ravel()

        return {
            'observation': obs.copy(),
            'achieved_goal': achieved_goal.copy(),
            'desired_goal': self.goal.copy(),
        }