Python ExpQuadrErrRewFcn示例，pyrado.tasks.reward_functions.ExpQuadrErrRewFcn Python示例

示例#1

0

显示文件

文件： predefined.py 项目： fdamken/SimuRLacra

def create_home_pos_task(env_spec: EnvSpec, obs_labels: Sequence[str],
                         state_des: np.ndarray) -> MaskedTask:
    """
    Create a task for moving the robot to safe position.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['PowerGrasp_R_Y', 'PowerGrasp_R_Z']. This needs to match the
                       observations' names in RcsPySim
    :param state_des: desired state (depends of the coordinate system). If reached, the task is over.
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    Q = 1e1 * np.eye(len(state_des))
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(spec, state_des, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)

示例#2

0

显示文件

文件： quanser_qube.py 项目： fdamken/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))
        Q = task_args.get("Q", np.diag([3.0, 4.0, 2.0, 2.0]))
        R = task_args.get("R", np.diag([5e-2]))

        return RadiallySymmDesStateTask(self.spec, state_des, ExpQuadrErrRewFcn(Q, R), idcs=[1])

示例#3

0

显示文件

文件： target_tracking.py 项目： arlene-kuehn/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Set up task. We track the distance to the goal for both hands separately.
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        mps_left = task_args.get('mps_left')
        mps_right = task_args.get('mps_right')

        if continuous_rew_fcn:
            Q = np.diag([1, 1e-3])
            R = 1e-4 * np.eye(self.act_space.flat_dim)
            rew_fcn_factory = lambda: ExpQuadrErrRewFcn(Q, R)
        else:
            rew_fcn_factory = MinusOnePerStepRewFcn
        succ_thold = 7.5e-2

        tasks_left = [
            create_goal_dist_distvel_task(self.spec, i, rew_fcn_factory(),
                                          succ_thold)
            for i in range(len(mps_left))
        ]
        tasks_right = [
            create_goal_dist_distvel_task(self.spec, i + len(mps_left),
                                          rew_fcn_factory(), succ_thold)
            for i in range(len(mps_right))
        ]

        return ParallelTasks([
            SequentialTasks(tasks_left, hold_rew_when_done=continuous_rew_fcn),
            SequentialTasks(tasks_right,
                            hold_rew_when_done=continuous_rew_fcn),
        ],
                             hold_rew_when_done=continuous_rew_fcn)

示例#4

0

显示文件

def create_box_lift_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                         succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_Z']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([1.1])  # box position is measured world coordinates
    if continuous_rew_fcn:
        Q = np.diag([3e1])
        R = 1e0 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)

示例#5

0

显示文件

def create_box_upper_shelve_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                                 succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_X', 'Box_Y', 'Box_Z', 'Box_A', 'Box_B', 'Box_C']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    state_des = np.zeros(
        6)  # zeros since we observe the box position relative to the goal
    if continuous_rew_fcn:
        Q = np.diag([5e0, 5e0, 5e0, 1e-1, 1e-1, 1e-1])
        R = 5e-2 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)

示例#6

0

显示文件

文件： quanser_qube.py 项目： arlene-kuehn/SimuRLacra

 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', np.array([0., np.pi, 0., 0.]))
     Q = task_args.get('Q', np.diag([2e-1, 1., 2e-2, 5e-3]))
     R = task_args.get('R', np.diag([3e-3]))
     return RadiallySymmDesStateTask(self.spec,
                                     state_des,
                                     ExpQuadrErrRewFcn(Q, R),
                                     idcs=[1])

示例#7

0

显示文件

 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', None)
     if state_des is None:
         state_des = np.array([0., np.pi, 0., 0.])
     Q = np.diag([3., 4., 2., 2.])
     R = np.diag([5e-2])
     return RadiallySymmDesStateTask(self.spec,
                                     state_des,
                                     ExpQuadrErrRewFcn(Q, R),
                                     idcs=[1])

示例#8

0

显示文件

    def _create_main_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 6,
                  self.state_space.flat_dim - 3))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            factor = task_args.get('success_bonus', 1)
            # Binary final reward task
            main_task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                     mode=FinalRewMode(always_positive=True),
                                     factor=factor)
            # Yield -1 on fail after the main task ist done (successfully or not)
            dont_fail_after_succ_task = FinalRewTask(
                GoallessTask(spec, ZeroPerStepRewFcn()),
                mode=FinalRewMode(always_negative=True),
                factor=factor)

            # Augment the binary task with an endless dummy task, to avoid early stopping
            task = SequentialTasks((main_task, dont_fail_after_succ_task))

            return MaskedTask(self.spec, task, idcs)

        else:
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            R_default = np.diag([
                0, 0, 1, 1e-2, 1e-2, 1e-1
            ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2])
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    2e1, 1e-4, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                R_default)  # last joint is really unreliable
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 0.05))

示例#9

0

显示文件

    def _create_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            # Binary final reward task
            task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                mode=FinalRewMode(always_positive=True),
                                factor=1)

            return MaskedTask(self.spec, task, idcs)

        else:
            # If we do not use copy(), state_des is a reference to passed body and updates automatically at each step
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    1e1, 1e5, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                np.diag([
                                    1e-1, 1e-1, 1e-1, 1e-2, 1e-2, 1e-2
                                ]))  # desired joint angles and velocities
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 1.))

示例#10

0

显示文件

def create_extract_slider_task(env_spec: EnvSpec, task_args: dict,
                               des_state: np.ndarray):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Slider_Y']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    Q = task_args.get('Q_slider', np.array([[4e1]]))
    R = task_args.get('R_slider', 1e-6 * np.eye(spec.act_space.flat_dim))
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    dst_task = DesStateTask(spec, des_state, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, dst_task, idcs)

示例#11

0

显示文件

文件： predefined.py 项目： fdamken/SimuRLacra

def create_lifting_task(
    env_spec: EnvSpec,
    obs_labels: Sequence[str],
    des_height: Union[float, np.ndarray],
    succ_thold: float = 0.01,
) -> MaskedTask:
    """
    Create a task for lifting an object.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['Box_Z']. This needs to match the observations' names in RcsPySim
    :param des_height: desired height of the object (depends of the coordinate system). If reached, the task is over.
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    state_des = np.asarray(des_height)
    Q = np.diag([6e2])
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)

示例#12

0

显示文件

文件： planar_insert.py 项目： fdamken/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", None)
        if state_des is None:
            # Get the goal position in world coordinates
            p = self.get_body_position("Goal", "", "")
            state_des = np.array([p[0], p[2], 0, 0, 0,
                                  0])  # X, Z, B, Xd, Zd, Bd

        # Create the individual subtasks
        task_reach_goal = create_insert_task(
            self.spec,
            state_des,
            rew_fcn=ExpQuadrErrRewFcn(
                Q=np.diag([2e1, 2e1, 1e-1, 1e-2, 1e-2, 1e-2]),
                R=2e-2 * np.eye(self.act_space.flat_dim)),
            success_fcn=partial(proximity_succeeded,
                                thold_dist=0.07,
                                dims=[0, 1, 2]),  # position and angle
        )
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.1 * np.ones(2), r=np.zeros(self.act_space.shape)))
        return ParallelTasks([task_reach_goal, task_ts_discrepancy])

示例#13

0

显示文件

    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get('consider_velocities', False):
            idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd']
        else:
            idcs = ['Effector_X', 'Effector_Z']

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # Get and set goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position('Goal1', '', '')
        p2 = self.get_body_position('Goal2', '', '')
        p3 = self.get_body_position('Goal3', '', '')
        if task_args.get('consider_velocities', False):
            Q = np.diag([1e0, 1e0, 1e-1, 1e-1])
            state_des1 = np.array([p1[0], p1[2], 0, 0])
            state_des2 = np.array([p2[0], p2[2], 0, 0])
            state_des3 = np.array([p3[0], p3[2], 0, 0])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = np.array([p1[0], p1[2]])
            state_des2 = np.array([p2[0], p2[2]])
            state_des3 = np.array([p3[0], p3[2]])

        success_fcn = functools.partial(proximity_succeeded,
                                        thold_dist=7.5e-2,
                                        dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_11 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_21 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_1p = ParallelTasks([subtask_11, subtask_21],
                                   hold_rew_when_done=True,
                                   verbose=False)
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_12 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_22 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_2p = ParallelTasks([subtask_12, subtask_22],
                                   hold_rew_when_done=True,
                                   verbose=False)
        task = FinalRewTask(SequentialTasks(
            [subtask_1p, subtask_3, subtask_2p],
            hold_rew_when_done=True,
            verbose=True),
                            mode=FinalRewMode(always_positive=True),
                            factor=2e3)
        masked_task = MaskedTask(self.spec, task, idcs)

        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)

        # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
        return ParallelTasks([masked_task, task_check_bounds])

示例#14

0

显示文件

    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get("consider_velocities", False):
            idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"]
        else:
            idcs = ["Effector_X", "Effector_Z"]

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )

        # Get the goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position("Goal1", "", "")
        p2 = self.get_body_position("Goal2", "", "")
        p3 = self.get_body_position("Goal3", "", "")
        state_des1 = np.array([p1[0], p1[2], 0, 0])
        state_des2 = np.array([p2[0], p2[2], 0, 0])
        state_des3 = np.array([p3[0], p3[2], 0, 0])
        if task_args.get("consider_velocities", False):
            Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = state_des1[:2]
            state_des2 = state_des2[:2]
            state_des3 = state_des3[:2]

        success_fcn = partial(proximity_succeeded,
                              thold_dist=7.5e-2,
                              dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_1 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_2 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_4 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_5 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_6 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        task = FinalRewTask(
            SequentialTasks(
                [
                    subtask_1, subtask_2, subtask_3, subtask_4, subtask_5,
                    subtask_6
                ],
                hold_rew_when_done=True,
                verbose=True,
            ),
            mode=FinalRewMode(always_positive=True),
            factor=5e3,
        )
        masked_task = MaskedTask(self.spec, task, idcs)

        # Additional tasks
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        if isinstance(self, Planar3LinkJointCtrlSim):
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks([masked_task, task_check_bounds],
                                 easily_satisfied=True)
        else:
            task_ts_discrepancy = create_task_space_discrepancy_task(
                self.spec,
                AbsErrRewFcn(q=0.5 * np.ones(2),
                             r=np.zeros(self.act_space.shape)))
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks(
                [masked_task, task_check_bounds, task_ts_discrepancy],
                easily_satisfied=True)