示例#1
0
    def __init__(self, max_steps: int, example_config: bool):
        """
        Constructor

        :param max_steps: maximum number of simulation steps
        :param example_config: configuration for the 'illustrative example' in the journal
        """
        Serializable._init(self, locals())

        super().__init__(dt=None, max_steps=max_steps)

        self.example_config = example_config
        self._planet = -1

        # Initialize the domain parameters (Earth)
        self._g = 9.81  # gravity constant [m/s**2]
        self._k = 2e3  # catapult spring's stiffness constant [N/m]
        self._x = 1.  # catapult spring's pre-elongation [m]

        # Domain independent parameter
        self._m = 70.  # victim's mass [kg]

        # Set the bounds for the system's states adn actions
        max_state = np.array([1000.])  # [m], arbitrary but >> self._x
        max_act = max_state
        self._curr_act = np.zeros_like(max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state, max_state, labels=['$h$'])
        self._init_space = SingularStateSpace(np.zeros(self._state_space.shape), labels=['$h_0$'])
        self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\theta$'])

        # Define the task including the reward function
        self._task = self._create_task(task_args=dict())
示例#2
0
    def __init__(self):
        """ Constructor """
        Serializable._init(self, locals())

        # Initialize basic variables
        super().__init__(dt=None, max_steps=1)

        # Set the bounds for the system's states adn actions
        max_state = np.array([100., 100.])
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=['x_1', 'x_2'])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=['x_1_init', 'x_2_init'])
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=['x_1_next', 'x_2_next'])

        # Define the task including the reward function
        self._task = self._create_task()

        # Animation with pyplot
        self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])
示例#3
0
def eval_domain_params(
        pool: SamplerPool,
        env: SimEnv,
        policy: Policy,
        params: List[Dict],
        init_state: Optional[np.ndarray] = None) -> List[StepSequence]:
    """
    Evaluate a policy on a multidimensional grid of domain parameters.

    :param pool: parallel sampler
    :param env: environment to evaluate in
    :param policy: policy to evaluate
    :param params: multidimensional grid of domain parameters
    :param init_state: initial state of the environment which will be fixed if not set to `None`
    :return: list of rollouts
    """
    # Strip all domain randomization wrappers from the environment
    env = remove_all_dr_wrappers(env, verbose=True)
    if init_state is not None:
        env.init_space = SingularStateSpace(fixed_state=init_state)

    pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy))

    # Run with progress bar
    with tqdm(leave=False, file=sys.stdout, unit="rollouts",
              desc="Sampling") as pb:
        return pool.run_map(
            functools.partial(_ps_run_one_domain_param, eval=True), params, pb)
示例#4
0
    def _create_spaces(self):
        # Initial state space
        # Set the actual stable initial position. This position would be reached after some time using the internal
        # PD controller to stabilize at self.qpos_des_init
        if self.num_dof == 7:
            self.init_qpos[:7] = np.array(
                [0., 0.65, 0., 1.41, 0., -0.28, -1.57])
            self.init_qpos[
                7] = -0.21  # angle of the first rope segment relative to the cup bottom plate
            init_ball_pos = np.array([0.828, 0., 1.131])
            init_cup_goal = np.array([0.82521, 0, 1.4469])
        elif self.num_dof == 4:
            self.init_qpos[:4] = np.array([0., 0.63, 0., 1.27])
            self.init_qpos[
                4] = -0.34  # angle of the first rope segment relative to the cup bottom plate
            init_ball_pos = np.array([0.723, 0., 1.168])
            init_cup_goal = np.array([0.758, 0, 1.5])
        # The initial position of the ball in cartesian coordinates
        init_state = np.concatenate(
            [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal])
        if self.fixed_init_state:
            self._init_space = SingularStateSpace(init_state)
        else:
            # Add plus/minus one degree to each motor joint and the first rope segment joint
            init_state_up = init_state.copy()
            init_state_up[:self.num_dof] += np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof]
            init_state_lo = init_state.copy()
            init_state_lo[:self.num_dof] -= np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof]
            self._init_space = BoxSpace(init_state_lo, init_state_up)

        # State space
        state_shape = init_state.shape
        state_up, state_lo = np.full(state_shape, pyrado.inf), np.full(
            state_shape, -pyrado.inf)
        # Ensure that joint limits of the arm are not reached (5 deg safety margin)
        state_up[:self.num_dof] = np.array([
            2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7
        ])[:self.num_dof] - 5 * np.pi / 180
        state_lo[:self.num_dof] = np.array([
            -2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7
        ])[:self.num_dof] + 5 * np.pi / 180
        self._state_space = BoxSpace(state_lo, state_up)

        # Torque space
        max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0,
                               2.0])[:self.num_dof]
        self._torque_space = BoxSpace(-max_torque, max_torque)

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
示例#5
0
    def __init__(self,
                 task_args: dict,
                 ref_frame: str,
                 position_mps: bool,
                 mps_left: [Sequence[dict], None],
                 fixed_init_state: bool = False,
                 **kwargs):
        """
        Constructor

        .. note::
            This constructor should only be called via the subclasses.

        :param task_args: arguments for the task construction
        :param ref_frame: reference frame for the MPs, e.g. 'world', 'box', or 'upperGoal'
        :param mps_left: left arm's movement primitives holding the dynamical systems and the goal states
        :param position_mps: `True` if the MPs are defined on position level, `False` if defined on velocity level
        :param fixed_init_state: use an init state space with only one state (e.g. for debugging)
        :param kwargs: keyword arguments which are available for all task-based `RcsSim`
                       taskCombinationMethod: str = 'mean',  # 'sum', 'mean',  'product', or 'softmax'
                       checkJointLimits: bool = False,
                       collisionAvoidanceIK: bool = True,
                       observeVelocities: bool = True,
                       observeCollisionCost: bool = True,
                       observePredictedCollisionCost: bool = False,
                       observeManipulabilityIndex: bool = False,
                       observeCurrentManipulability: bool = True,
                       observeDynamicalSystemDiscrepancy: bool = False,
                       observeTaskSpaceDiscrepancy: bool = True,
                       observeForceTorque: bool = True
        """
        Serializable._init(self, locals())

        # Forward to the RcsSim's constructor
        RcsSim.__init__(
            self,
            envType='BoxShelving',
            extraConfigDir=osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'BoxShelving'),
            hudColor='BLACK_RUBBER',
            task_args=task_args,
            refFrame=ref_frame,
            positionTasks=position_mps,
            tasksLeft=mps_left,
            **kwargs
        )

        # Initial state space definition
        if fixed_init_state:
            dafault_init_state = np.array([0., 0., 0., 0.8, 30.*np.pi/180, 90.*np.pi/180])  # [m, m, rad, m, rad, rad]
            self._init_space = SingularStateSpace(dafault_init_state,
                                                  labels=['$x$', '$y$', '$th$', '$z$', '$q_2$', '$q_4$'])
        else:
            min_init_state = np.array([-0.02, -0.02, -3.*np.pi/180., 0.78, 27.*np.pi/180, 77.*np.pi/180])
            max_init_state = np.array([0.02, 0.02, 3.*np.pi/180., 0.82, 33.*np.pi/180, 83.*np.pi/180])
            self._init_space = BoxSpace(min_init_state, max_init_state,  # [m, m, rad, m, rad, rad]
                                        labels=['$x$', '$y$', '$th$', '$z$', '$q_2$', '$q_4$'])
示例#6
0
    def __init__(self,
                 task_args: dict,
                 max_dist_force: float = None,
                 **kwargs):
        """
        Constructor

        .. note::
            This constructor should only be called via the subclasses.

        :param task_args: arguments for the task construction
        :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance
        :param kwargs: keyword arguments forwarded to `RcsSim`
                       collisionConfig: specification of the Rcs CollisionModel
        """
        Serializable._init(self, locals())

        if kwargs.get('collisionConfig', None) is None:
            collision_config = {
                'pairs': [
                    {
                        'body1': 'Effector',
                        'body2': 'Link2'
                    },
                    {
                        'body1': 'Effector',
                        'body2': 'Link1'
                    },
                ],
                'threshold':
                0.15,
                'predCollHorizon':
                20
            }
        else:
            collision_config = kwargs.get('collisionConfig')

        # Forward to the RcsSim's constructor, nothing more needs to be done here
        RcsSim.__init__(self,
                        envType='Planar3Link',
                        graphFileName='gPlanar3Link.xml',
                        task_args=task_args,
                        collisionConfig=collision_config,
                        **kwargs)

        # Initial state space definition
        upright_init_state = np.array([0.1, 0.1, 0.1])  # [rad, rad, rad]
        self._init_space = SingularStateSpace(
            upright_init_state, labels=['$q_1$', '$q_2$', '$q_3$'])

        # Setup disturbance
        self._max_dist_force = max_dist_force
示例#7
0
    def _create_spaces(self):
        # Define the spaces
        max_state = np.array([4 * np.pi, 4 * np.pi])  # [rad, rad/s]
        max_obs = np.array([1.0, 1.0, np.inf])  # [-, -, rad/s]
        tau_max = self.domain_param["torque_thold"]

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=["theta", "theta_dot"])
        self._obs_space = BoxSpace(
            -max_obs, max_obs, labels=["sin_theta", "cos_theta", "theta_dot"])
        self._init_space = SingularStateSpace(self._init_state,
                                              labels=["theta", "theta_dot"])
        self._act_space = BoxSpace(-tau_max, tau_max, labels=["tau"])
示例#8
0
    def _create_spaces(self):
        # Torque space
        max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])
        self._torque_space = BoxSpace(-max_torque, max_torque)

        # Initial state space
        # Set the actual stable initial position. This position would be reached after some time using the internal
        # PD controller to stabilize at self.init_pose_des
        # An initial qpos measured on real Barret WAM:
        #   [-3.6523e-05  6.4910e-01  4.4244e-03  1.4211e+00  8.8864e-03 -2.7763e-01 -1.5309e+00]
        self.init_qpos[:7] = np.array([0., 0.65, 0., 1.41, 0., -0.28, -1.57])
        # Set the angle of the first rope segment relative to the cup bottom plate
        self.init_qpos[7] = -0.21
        # The initial position of the ball in cartesian coordinates
        init_ball_pos = np.array([0., -0.8566, 0.85391])
        init_state = np.concatenate(
            [self.init_qpos, self.init_qvel, init_ball_pos])
        if self.fixed_initial_state:
            self._init_space = SingularStateSpace(init_state)
        else:
            # Add plus/minus one degree to each motor joint and the first rope segment joint
            init_state_up = init_state.copy()
            init_state_up[:8] += 1 * np.pi / 180
            init_state_lo = init_state.copy()
            init_state_lo[:8] -= 1 * np.pi / 180
            self._init_space = BoxSpace(init_state_lo, init_state_up)

        # State space
        state_shape = init_state.shape
        max_state = np.full(state_shape, pyrado.inf)
        self._state_space = BoxSpace(-max_state, max_state)

        # Action space (PD controller on 3 joint positions and velocities)
        act_up = np.array(
            [1.985, np.pi, np.pi / 2, 10 * np.pi, 10 * np.pi, 10 * np.pi])
        act_lo = np.array(
            [-1.985, -0.9, -np.pi / 2, -10 * np.pi, -10 * np.pi, -10 * np.pi])
        self._act_space = BoxSpace(
            act_lo,
            act_up,  # [rad, rad, rad, rad/s, rad/s, rad/s]
            labels=[
                r'$q_{1,des}$', r'$q_{3,des}$', r'$q_{5,des}$',
                r'$\dot{q}_{1,des}$', r'$\dot{q}_{3,des}$',
                r'$\dot{q}_{5,des}$'
            ])

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['$t$'])
示例#9
0
    def __init__(self):
        """Constructor"""
        Serializable._init(self, locals())

        # Call SimEnv's constructor
        super().__init__(dt=1.0, max_steps=1)

        # Initialize the domain parameters and the derived constants
        self._domain_param = self.get_nominal_domain_param()
        self._mean, self._covariance_matrix = self.calc_constants(self.domain_param)

        self._init_space = SingularStateSpace(np.zeros(self.state_space.shape))

        # Define a dummy task including the reward function
        self._task = self._create_task()
示例#10
0
def test_velocity_filter(plot: bool):
    # Set up environment
    env_gt = QQubeSwingUpSim(dt=1 / 500.0, max_steps=350)
    env_gt.init_space = SingularStateSpace(np.array([0.1, np.pi / 2, 3.0, 0]))
    env_filt = ObsVelFiltWrapper(env_gt,
                                 idcs_pos=["theta", "alpha"],
                                 idcs_vel=["theta_dot", "alpha_dot"])

    # Set up policy
    policy = IdlePolicy(env_gt.spec)

    # Simulate
    ro_gt = rollout(env_gt, policy)
    ro_filt = rollout(env_filt, policy)

    # Filter the observations of the last rollout
    theta_dot_gt = ro_gt.observations[:, 4]
    alpha_dot_gt = ro_gt.observations[:, 5]
    theta_dot_filt = ro_filt.observations[:, 4]
    alpha_dot_filt = ro_filt.observations[:, 5]

    assert theta_dot_filt[0] != pytest.approx(
        theta_dot_gt[0])  # can't be equal since we set an init vel of 3 rad/s
    assert alpha_dot_filt[0] == pytest.approx(alpha_dot_gt[0], abs=1e-4)

    # Compute the error
    rmse_theta = rmse(theta_dot_gt, theta_dot_filt)
    rmse_alpha = rmse(alpha_dot_gt, alpha_dot_filt)

    if plot:
        from matplotlib import pyplot as plt

        # Plot the filtered signals versus the orignal observations
        plt.rc("text", usetex=True)
        fix, axs = plt.subplots(2, figsize=(16, 9))
        axs[0].plot(theta_dot_gt, label=r"$\dot{\theta}_{true}$")
        axs[0].plot(theta_dot_filt, label=r"$\dot{\theta}_{filt}$")
        axs[1].plot(alpha_dot_gt, label=r"$\dot{\alpha}_{true}$")
        axs[1].plot(alpha_dot_filt, label=r"$\dot{\alpha}_{filt}$")

        axs[0].set_title(rf"RMSE($\theta$): {rmse_theta}")
        axs[0].set_ylabel(r"$\dot{\theta}$ [rad/s]")
        axs[0].legend()
        axs[1].set_title(rf"RMSE($\alpha$): {rmse_alpha}")
        axs[1].set_xlabel("time steps")
        axs[1].set_ylabel(r"$\dot{\alpha}$ [rad/s]")
        axs[1].legend()
        plt.show()
示例#11
0
    def _create_spaces(self):
        tau_max = self.domain_param['tau_max']

        # Define the spaces
        max_state = np.array([4 * np.pi, 4 * np.pi])  # [rad, rad/s]
        max_obs = np.array([1., 1., np.inf])  # [-, -, rad/s]
        init_state = np.zeros(2)  # [rad, rad/s]

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=['theta', 'theta_dot'])
        self._obs_space = BoxSpace(
            -max_obs, max_obs, labels=['sin_theta', 'cos_theta', 'theta_dot'])
        self._init_space = SingularStateSpace(init_state,
                                              labels=['theta', 'theta_dot'])
        self._act_space = BoxSpace(-tau_max, tau_max, labels=['tau'])
示例#12
0
    def __init__(
        self,
        num_dof: int,
        frame_skip: int = 4,
        dt: Optional[float] = None,
        max_steps: int = pyrado.inf,
        task_args: Optional[dict] = None,
    ):
        """
        Constructor

        :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used
        :param frame_skip: number of simulation frames for which the same action is held, results in a multiplier of
                           the time step size `dt`
        :param dt: by default the time step size is the one from the mujoco config file multiplied by the number of
                   frame skips (legacy from OpenAI environments). By passing an explicit `dt` value, this can be
                   overwritten. Possible use case if if you know that you recorded a trajectory with a specific `dt`.
        :param max_steps: max number of simulation time steps
        :param task_args: arguments for the task construction
        """
        Serializable._init(self, locals())

        # Initialize num DoF specific variables
        if num_dof == 4:
            graph_file_name = "wam_4dof_base.xml"
            self.qpos_des_init = INIT_QPOS_DES_4DOF
            self.p_gains = WAM_PGAINS_4DOF
            self.d_gains = WAM_DGAINS_4DOF
        elif num_dof == 7:
            graph_file_name = "wam_7dof_base.xml"
            self.qpos_des_init = INIT_QPOS_DES_7DOF
            self.p_gains = WAM_PGAINS_7DOF
            self.d_gains = WAM_DGAINS_7DOF
        else:
            raise pyrado.ValueErr(given=num_dof, eq_constraint="4 or 7")

        model_path = osp.join(pyrado.MUJOCO_ASSETS_DIR, graph_file_name)
        super().__init__(num_dof, model_path, frame_skip, dt, max_steps,
                         task_args)

        # Fixed initial state space
        init_state = np.concatenate([self.init_qpos, self.init_qvel])
        self._init_space = SingularStateSpace(init_state)
示例#13
0
    def __init__(self,
                 mps: Sequence[dict] = None,
                 task_args: dict = None,
                 max_dist_force: float = None,
                 position_mps: bool = True,
                 **kwargs):
        """
        Constructor

        :param mps: movement primitives holding the dynamical systems and the goal states
        :param task_args: arguments for the task construction
        :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance
        :param position_mps: `True` if the MPs are defined on position level, `False` if defined on velocity level,
                             only matters if `actionModelType='activation'`
        :param kwargs: keyword arguments forwarded to `RcsSim`
                       taskCombinationMethod='sum'  # or 'mean' or 'product'
        """
        Serializable._init(self, locals())

        # Forward to the RcsSim's constructor, nothing more needs to be done here
        RcsSim.__init__(
            self,
            envType='MPBlending',
            task_args=task_args,
            actionModelType='activation',
            tasks=mps,
            positionTasks=position_mps,
            **kwargs
        )

        # Store Planar3Link specific vars
        center_init_state = np.array([0., 0.])  # [m]
        self._init_space = SingularStateSpace(center_init_state, labels=['$x$', '$y$'])

        # Setup disturbance
        self._max_dist_force = max_dist_force
示例#14
0
    def __init__(self,
                 task_args: dict,
                 collision_config: dict = None,
                 max_dist_force: float = None,
                 **kwargs):
        """
        Constructor

        .. note::
            This constructor should only be called via the subclasses.

        :param task_args: arguments for the task construction
        :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance
        :param kwargs: keyword arguments forwarded to `RcsSim`
                       collisionConfig: specification of the Rcs CollisionModel
        """
        Serializable._init(self, locals())

        # Forward to the RcsSim's constructor, nothing more needs to be done here
        RcsSim.__init__(self,
                        envType='PlanarInsert',
                        physicsConfigFile='pPlanarInsert.xml',
                        task_args=task_args,
                        collisionConfig=collision_config,
                        **kwargs)

        if kwargs.get('collisionConfig', None) is None:
            collision_config = {
                'pairs': [
                    {
                        'body1': 'Effector',
                        'body2': 'Link3'
                    },
                    {
                        'body1': 'Effector',
                        'body2': 'Link2'
                    },
                    {
                        'body1': 'UpperWall',
                        'body2': 'Link4'
                    },
                    {
                        'body1': 'LowerWall',
                        'body2': 'Link4'
                    },
                    {
                        'body1': 'LowerWall',
                        'body2': 'Link3'
                    },
                    {
                        'body1': 'LowerWall',
                        'body2': 'Link2'
                    },
                ],
                'threshold':
                0.05
            }
        else:
            collision_config = kwargs.get('collisionConfig')

        # Initial state space definition
        init_state = np.array([-40, 30, 30, 30, -30
                               ]) / 180 * np.pi  # [rad, rad, rad]
        self._init_space = SingularStateSpace(
            init_state, labels=['$q_1$', '$q_2$', '$q_3$', '$q_4$', '$q_5$'])

        # Setup disturbance
        self._max_dist_force = max_dist_force
示例#15
0
class RosenSim(SimEnv, Serializable):
    """ This environment wraps the Rosenbrock function to use it as a test case for Pyrado algorithms. """

    name: str = 'rosen'

    def __init__(self):
        """ Constructor """
        Serializable._init(self, locals())

        # Initialize basic variables
        super().__init__(dt=None, max_steps=1)

        # Set the bounds for the system's states adn actions
        max_state = np.array([100., 100.])
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=['x_1', 'x_2'])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=['x_1_init', 'x_2_init'])
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=['x_1_next', 'x_2_next'])

        # Define the task including the reward function
        self._task = self._create_task()

        # Animation with pyplot
        self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])

    @property
    def state_space(self):
        return self._state_space

    @property
    def obs_space(self):
        return self._state_space

    @property
    def init_space(self):
        return self._init_space

    @property
    def act_space(self):
        return self._act_space

    def _create_task(self, task_args: dict = None) -> OptimProxyTask:
        return OptimProxyTask(self.spec,
                              StateBasedRewFcn(rosenbrock, flip_sign=True))

    @property
    def task(self) -> OptimProxyTask:
        return self._task

    @property
    def domain_param(self):
        return {}

    @domain_param.setter
    def domain_param(self, param):
        pass

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        return {}

    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Reset the state
        if init_state is None:
            self.state = self._init_space.sample_uniform()  # zero
        else:
            if not init_state.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self.obs_space)
            if isinstance(init_state, np.ndarray):
                self.state = init_state.copy()
            else:
                try:
                    self.state = np.array(init_state)
                except Exception:
                    raise pyrado.TypeErr(given=init_state,
                                         expected_type=[np.ndarray, list])

        # No need to reset the task

        # Return perfect observation
        return self.observe(self.state)

    def step(self, act):
        # Apply actuator limits
        act = self.limit_act(act)

        # Action equal selection a new state a.k.a. solution of the optimization problem
        self.state = act

        # Current reward depending on the state after the step (since there is only one step)
        self._curr_rew = self.task.step_rew(self.state)

        self._curr_step += 1

        # Check if the task or the environment is done
        done = self.task.is_done(self.state)
        if self._curr_step >= self._max_steps:
            done = True

        return self.observe(self.state), self._curr_rew, done, {}

    def render(self, mode: RenderMode, render_step: int = 1):
        # Call base class
        super().render(mode)

        # Print to console
        if mode.text:
            if self._curr_step % render_step == 0 and self._curr_step > 0:  # skip the render before the first step
                print(
                    "step: {:3}  |  r_t: {: 1.3f}  |  a_t: {}\t |  s_t+1: {}".
                    format(self._curr_step, self._curr_rew, self._curr_act,
                           self.state))

        # Render using pyplot
        if mode.video:
            from matplotlib import pyplot as plt
            from pyrado.plotting.surface import draw_surface

            plt.ion()

            if self._anim['fig'] is None:
                # Plot Rosenbrock function once if not already plotted
                x = np.linspace(-2, 2, 20, True)
                y = np.linspace(-1, 3, 20, True)
                self._anim['fig'] = draw_surface(x, y, rosenbrock, 'x', 'y',
                                                 'z')

            self._anim['trace_x'].append(self.state[0])
            self._anim['trace_y'].append(self.state[1])
            self._anim['trace_z'].append(rosenbrock(self.state))

            ax = self._anim['fig'].gca()
            ax.scatter(self._anim['trace_x'],
                       self._anim['trace_y'],
                       self._anim['trace_z'],
                       s=8,
                       c='w')

            plt.draw()
示例#16
0
class CatapultSim(SimEnv, Serializable):
    """
    In this special environment, the action is equal to the policy parameter. Therefore, it makes only sense to
    use it in combination with a linear policy that has only one constant feature.
    """

    name: str = "cata"

    def __init__(self, max_steps: int, example_config: bool):
        """
        Constructor

        :param max_steps: maximum number of simulation steps
        :param example_config: configuration for the 'illustrative example' in the journal
        """
        Serializable._init(self, locals())

        super().__init__(dt=1, max_steps=max_steps)

        self.example_config = example_config
        self._planet = -1

        # Initialize the domain parameters (Earth)
        self._g = CatapultSim.get_nominal_domain_param()[
            "gravity_const"]  # planet's gravity constant [m/s**2]
        self._k = CatapultSim.get_nominal_domain_param()[
            "stiffness"]  # catapult spring's stiffness constant [N/m]
        self._x = CatapultSim.get_nominal_domain_param()[
            "elongation"]  # catapult spring's pre-elongation [m]

        # Domain independent parameter
        self._m = 70.0  # victim's mass [kg]

        # Set the bounds for the system's states adn actions
        max_state = np.array([1000.0])  # [m], arbitrary but >> self._x
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state, max_state, labels=["h"])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=["h_0"])
        self._act_space = BoxSpace(-max_act, max_act, labels=["theta"])

        # Define the task including the reward function
        self._task = self._create_task(task_args=dict())

    @property
    def state_space(self):
        return self._state_space

    @property
    def obs_space(self):
        return self._state_space

    @property
    def init_space(self):
        return self._init_space

    @property
    def act_space(self):
        return self._act_space

    def _create_task(self, task_args: dict) -> DesStateTask:
        # Define the task including the reward function
        state_des = task_args.get("state_des",
                                  np.zeros(self._state_space.shape))
        return DesStateTask(self.spec,
                            state_des,
                            rew_fcn=AbsErrRewFcn(q=np.array([1.0]),
                                                 r=np.array([0.0])))

    @property
    def task(self):
        return self._task

    @property
    def domain_param(self) -> dict:
        if self.example_config:
            return dict(planet=self._planet)
        else:
            return dict(gravity_const=self._g, k=self._k, x=self._x)

    @domain_param.setter
    def domain_param(self, domain_param: dict):
        assert isinstance(domain_param, dict)
        # Set the new domain params if given, else the default value
        if self.example_config:
            if domain_param["planet"] == 0:
                # Mars
                self._g = 3.71
                self._k = 1e3
                self._x = 0.5
            elif domain_param["planet"] == 1:
                # Venus
                self._g = 8.87
                self._k = 3e3
                self._x = 1.5
            elif domain_param["planet"] == -1:
                # Default value which should make the computation invalid
                self._g = None
                self._k = None
                self._x = None
            else:
                raise pyrado.ValueErr(given=domain_param["planet"],
                                      eq_constraint="0 or 1")

        else:
            assert self._g > 0 and self._k > 0 and self._x > 0
            self._g = domain_param.get("gravity_const", self._g)
            self._k = domain_param.get("stiffness", self._k)
            self._x = domain_param.get("elongation", self._x)

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        return dict(gravity_const=9.81, stiffness=2000.0, elongation=1.0)

    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Reset the state
        if init_state is None:
            # Sample from the init state space
            self.state = self._init_space.sample_uniform()
        else:
            if not init_state.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self.obs_space)
            if isinstance(init_state, np.ndarray):
                self.state = init_state.copy()
            else:
                try:
                    self.state = np.asarray(init_state)
                except Exception:
                    raise pyrado.TypeErr(given=init_state,
                                         expected_type=np.ndarray)

        # Reset the task
        self._task.reset(env_spec=self.spec)

        # Return perfect observation
        return self.observe(self.state)

    def step(self, act: np.ndarray) -> tuple:
        # Apply actuator limits
        act = self.limit_act(act)  # dummy for CatapultSim
        self._curr_act = act  # just for the render function

        # Calculate the maximum height of the flight trajectory ("one step dynamics")
        self.state = self._k / (2.0 * self._m *
                                self._g) * (act - self._x)**2  # h(theta, xi)

        # Current reward depending on the state after the step (since there is only one step) and the (unlimited) action
        self._curr_rew = self.task.step_rew(self.state, act, self._curr_step)

        self._curr_step += 1

        # Check if the task or the environment is done
        done = self._task.is_done(self.state)
        if self._curr_step >= self._max_steps:
            done = True

        if done:
            # Add final reward if done
            remaining_steps = self._max_steps - (
                self._curr_step +
                1) if self._max_steps is not pyrado.inf else 0
            self._curr_rew += self._task.final_rew(self.state, remaining_steps)

        return self.observe(self.state), self._curr_rew, done, {}

    def render(self, mode: RenderMode, render_step: int = 1):
        # Call base class
        super().render(mode)

        # Print to console
        if mode.text:
            if self._curr_step % render_step == 0 and self._curr_step > 0:  # skip the render before the first step
                print(
                    f"step: {self._curr_step:4d}  |  r_t: {self._curr_rew: 1.3f}  |  a_t: {self._curr_act}  |  s_t+1: {self.state}"
                )
示例#17
0
def test_pair_plot(
    env: SimEnv,
    policy: Policy,
    layout: str,
    labels: Optional[str],
    prob_labels: Optional[str],
    use_prior: bool,
    use_trafo: bool,
):
    def _simulator(dp: to.Tensor) -> to.Tensor:
        """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope"""
        ro = rollout(
            env,
            policy,
            eval=True,
            reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2])))
        observation_sim = to.from_numpy(
            ro.observations[-1]).to(dtype=to.float32)
        return to.atleast_2d(observation_sim)

    # Fix the init state
    env.init_space = SingularStateSpace(env.init_space.sample_uniform())
    env_real = deepcopy(env)
    env_real.domain_param = {"mass": 0.8, "stiffness": 35, "d": 0.7}

    # Optionally transformed domain parameters for inference
    if use_trafo:
        env = SqrtDomainParamTransform(env, mask=["stiffness"])

    # Domain parameter mapping and prior
    dp_mapping = {0: "mass", 1: "stiffness", 2: "d"}
    prior = sbiutils.BoxUniform(low=to.tensor([0.5, 20, 0.2]),
                                high=to.tensor([1.5, 40, 0.8]))

    # Learn a likelihood from the simulator
    density_estimator = sbiutils.posterior_nn(model="maf",
                                              hidden_features=10,
                                              num_transforms=3)
    snpe = SNPE(prior, density_estimator)
    simulator, prior = prepare_for_sbi(_simulator, prior)
    domain_param, data_sim = simulate_for_sbi(simulator=simulator,
                                              proposal=prior,
                                              num_simulations=50,
                                              num_workers=1)
    snpe.append_simulations(domain_param, data_sim)
    density_estimator = snpe.train(max_num_epochs=5)
    posterior = snpe.build_posterior(density_estimator)

    # Create a fake (random) true domain parameter
    domain_param_gt = to.tensor(
        [env_real.domain_param[key] for _, key in dp_mapping.items()])
    domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 5
    domain_param_gt = domain_param_gt.unsqueeze(0)
    data_real = simulator(domain_param_gt)

    # Get a (random) condition
    condition = Embedding.pack(domain_param_gt.clone())

    if layout == "inside":
        num_rows, num_cols = len(dp_mapping), len(dp_mapping)
    else:
        num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1

    if use_prior:
        grid_bounds = None
    else:
        prior = None
        grid_bounds = to.cat(
            [to.zeros((len(dp_mapping), 1)),
             to.ones((len(dp_mapping), 1))],
            dim=1)

    _, axs = plt.subplots(num_rows,
                          num_cols,
                          figsize=(14, 14),
                          tight_layout=True)
    fig = draw_posterior_pairwise_heatmap(
        axs,
        posterior,
        data_real,
        dp_mapping,
        condition,
        prior=prior,
        env_real=env_real,
        marginal_layout=layout,
        grid_bounds=grid_bounds,
        grid_res=100,
        normalize_posterior=False,
        rescale_posterior=True,
        labels=None if labels is None else [""] * len(dp_mapping),
        prob_labels=prob_labels,
    )

    assert fig is not None
示例#18
0
def test_pair_plot_scatter(
    env: SimEnv,
    policy: Policy,
    layout: str,
    labels: Optional[str],
    legend_labels: Optional[str],
    axis_limits: Optional[str],
    use_kde: bool,
    use_trafo: bool,
):
    def _simulator(dp: to.Tensor) -> to.Tensor:
        """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope"""
        ro = rollout(
            env,
            policy,
            eval=True,
            reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2])))
        observation_sim = to.from_numpy(
            ro.observations[-1]).to(dtype=to.float32)
        return to.atleast_2d(observation_sim)

    # Fix the init state
    env.init_space = SingularStateSpace(env.init_space.sample_uniform())
    env_real = deepcopy(env)
    env_real.domain_param = {"mass": 0.8, "stiffness": 15, "d": 0.7}

    # Optionally transformed domain parameters for inference
    if use_trafo:
        env = LogDomainParamTransform(env, mask=["stiffness"])

    # Domain parameter mapping and prior
    dp_mapping = {0: "mass", 1: "stiffness", 2: "d"}
    k_low = np.log(10) if use_trafo else 10
    k_up = np.log(20) if use_trafo else 20
    prior = sbiutils.BoxUniform(low=to.tensor([0.5, k_low, 0.2]),
                                high=to.tensor([1.5, k_up, 0.8]))

    # Learn a likelihood from the simulator
    density_estimator = sbiutils.posterior_nn(model="maf",
                                              hidden_features=10,
                                              num_transforms=3)
    snpe = SNPE(prior, density_estimator)
    simulator, prior = prepare_for_sbi(_simulator, prior)
    domain_param, data_sim = simulate_for_sbi(simulator=simulator,
                                              proposal=prior,
                                              num_simulations=50,
                                              num_workers=1)
    snpe.append_simulations(domain_param, data_sim)
    density_estimator = snpe.train(max_num_epochs=5)
    posterior = snpe.build_posterior(density_estimator)

    # Create a fake (random) true domain parameter
    domain_param_gt = to.tensor([
        env_real.domain_param[dp_mapping[key]]
        for key in sorted(dp_mapping.keys())
    ])
    domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 10
    domain_param_gt = domain_param_gt.unsqueeze(0)
    data_real = simulator(domain_param_gt)

    domain_params, log_probs = SBIBase.eval_posterior(
        posterior,
        data_real,
        num_samples=6,
        normalize_posterior=False,
        subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=False),
    )
    dp_samples = [
        domain_params.reshape(1, -1, domain_params.shape[-1]).squeeze()
    ]

    if layout == "inside":
        num_rows, num_cols = len(dp_mapping), len(dp_mapping)
    else:
        num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1

    _, axs = plt.subplots(num_rows,
                          num_cols,
                          figsize=(8, 8),
                          tight_layout=True)
    fig = draw_posterior_pairwise_scatter(
        axs=axs,
        dp_samples=dp_samples,
        dp_mapping=dp_mapping,
        prior=prior if axis_limits == "use_prior" else None,
        env_sim=env,
        env_real=env_real,
        axis_limits=axis_limits,
        marginal_layout=layout,
        labels=labels,
        legend_labels=legend_labels,
        use_kde=use_kde,
    )
    assert fig is not None
示例#19
0
 def act_space(self):
     return SingularStateSpace(np.zeros(1))
示例#20
0
    def __init__(
        self,
        num_dof: int,
        frame_skip: int = 4,
        dt: Optional[float] = None,
        max_steps: int = pyrado.inf,
        fixed_init_state: bool = True,
        stop_on_collision: bool = True,
        observe_ball: bool = False,
        observe_cup: bool = False,
        task_args: Optional[dict] = None,
    ):
        """
        Constructor

        :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used
        :param frame_skip: number of simulation frames for which the same action is held, results in a multiplier of
                           the time step size `dt`
        :param dt: by default the time step size is the one from the mujoco config file multiplied by the number of
                   frame skips (legacy from OpenAI environments). By passing an explicit `dt` value, this can be
                   overwritten. Possible use case if if you know that you recorded a trajectory with a specific `dt`.
        :param max_steps: max number of simulation time steps
        :param fixed_init_state: enables/disables deterministic, fixed initial state
        :param stop_on_collision: set the `failed` flag in the dict returned by `_mujoco_step()` to true, if the ball
                                  collides with something else than the desired parts of the cup. This causes the
                                  episode to end. Keep in mind that in case of a negative step reward and no final
                                  cost on failing, this might result in undesired behavior.
        :param observe_ball: if `True`, include the 2-dim (x-z plane) cartesian ball position into the observation
        :param observe_cup: if `True`, include the 2-dim (x-z plane) cartesian cup position into the observation
        :param task_args: arguments for the task construction
        """
        Serializable._init(self, locals())

        self.fixed_init_state = fixed_init_state
        self.observe_ball = observe_ball
        self.observe_cup = observe_cup

        # Initialize num DoF specific variables
        if num_dof == 4:
            graph_file_name = "wam_4dof_bic.xml"
            self.qpos_des_init = INIT_QPOS_DES_4DOF
            self.p_gains = WAM_PGAINS_4DOF
            self.d_gains = WAM_DGAINS_4DOF
            init_ball_pos = np.array([0.723, 0.0, 1.168])
            init_cup_goal = GOAL_POS_INIT_SIM_4DOF
        elif num_dof == 7:
            graph_file_name = "wam_7dof_bic.xml"
            self.qpos_des_init = INIT_QPOS_DES_7DOF
            self.p_gains = WAM_PGAINS_7DOF
            self.d_gains = WAM_DGAINS_7DOF
            init_ball_pos = np.array([0.828, 0.0, 1.131])
            init_cup_goal = GOAL_POS_INIT_SIM_7DOF
        else:
            raise pyrado.ValueErr(given=num_dof, eq_constraint="4 or 7")

        model_path = osp.join(pyrado.MUJOCO_ASSETS_DIR, graph_file_name)
        super().__init__(num_dof, model_path, frame_skip, dt, max_steps,
                         task_args)

        # Actual initial joint position (when the WAM moved to the home position)
        if num_dof == 4:
            self.init_qpos[:4] = np.array([0.0, 0.63, 0.0, 1.27])
            self.init_qpos[
                4] = -0.34  # angle of the first rope segment relative to the cup bottom plate
        else:
            self.init_qpos[:7] = np.array(
                [0.0, 0.65, 0.0, 1.41, 0.0, -0.28, -1.57])
            self.init_qpos[
                7] = -0.21  # angle of the first rope segment relative to the cup bottom plate

        # Set the actual stable initial position. This position would be reached after some time using the internal
        # PD controller to stabilize at self._qpos_des_init.
        # The initial position of the ball in cartesian coordinates
        self._init_state = np.concatenate(
            [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal])
        if self.fixed_init_state:
            self._init_space = SingularStateSpace(self._init_state)
        else:
            # Add plus/minus one degree to each motor joint and the first rope segment joint
            init_state_up = self._init_state.copy()
            init_state_up[:self._num_dof] += np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1.0, 0.1, 1.0, 1.0])[:self._num_dof]
            init_state_lo = self._init_state.copy()
            init_state_lo[:self._num_dof] -= np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1.0, 0.1, 1.0, 1.0])[:self._num_dof]
            self._init_space = BoxSpace(init_state_lo, init_state_up)

        # Bodies to check fo collision
        self._collision_bodies = [
            "wam/base_link",
            "wam/shoulder_yaw_link",
            "wam/shoulder_pitch_link",
            "wam/upper_arm_link",
            "wam/forearm_link",
            "wrist_palm_link",
            "wam/wrist_pitch_link",
            "wam/wrist_yaw_link",
        ]
        if self._num_dof == 4:
            self._collision_bodies = self._collision_bodies[:6]

        # We access a private attribute since a method like 'model.geom_names[geom_id]' cannot be used because
        # not every geom has a name
        self._collision_geom_ids = [
            self.model._geom_name2id[name]
            for name in ["cup_geom1", "cup_geom2"]
        ]
        self.stop_on_collision = stop_on_collision

        self.camera_config.update(dict(distance=2.7))