Пример #1
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))
        Q = task_args.get("Q", np.diag([3.0, 4.0, 2.0, 2.0]))
        R = task_args.get("R", np.diag([5e-2]))

        return RadiallySymmDesStateTask(self.spec, state_des, ExpQuadrErrRewFcn(Q, R), idcs=[1])
Пример #2
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, UnderActuatedSwingUpRewFcn(), idcs=[1]),
            mode=FinalRewMode(always_negative=True),
        )
Пример #3
0
 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', np.array([0., np.pi, 0., 0.]))
     Q = task_args.get('Q', np.diag([2e-1, 1., 2e-2, 5e-3]))
     R = task_args.get('R', np.diag([3e-3]))
     return RadiallySymmDesStateTask(self.spec,
                                     state_des,
                                     ExpQuadrErrRewFcn(Q, R),
                                     idcs=[1])
Пример #4
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))
        Q = task_args.get("Q", np.diag([5e-0, 1e1, 1e-2, 1e-2]))
        R = task_args.get("R", np.diag([1e-3]))

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R), idcs=[1]),
            mode=FinalRewMode(state_dependent=True, time_dependent=True),
        )
Пример #5
0
 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', None)
     if state_des is None:
         state_des = np.array([0., np.pi, 0., 0.])
     Q = np.diag([3., 4., 2., 2.])
     R = np.diag([5e-2])
     return RadiallySymmDesStateTask(self.spec,
                                     state_des,
                                     ExpQuadrErrRewFcn(Q, R),
                                     idcs=[1])
Пример #6
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', None)
        if state_des is None:
            state_des = np.array([0., np.pi, 0., 0.])

        return FinalRewTask(RadiallySymmDesStateTask(
            self.spec,
            state_des,
            UnderActuatedSwingUpRewFcn(c_act=1e-2),
            idcs=[1]),
                            mode=FinalRewMode(always_negative=True))
Пример #7
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0,
                                                         0.0]))
        Q = task_args.get("Q", np.diag([3e-1, 5e-1, 5e-3, 1e-3]))
        R = task_args.get("R", np.diag([1e-3]))
        rew_fcn = QuadrErrRewFcn(Q, R)

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, rew_fcn, idcs=[1]),
            mode=FinalRewMode(always_negative=True),
            factor=1e4,
        )
Пример #8
0
def test_modulated_rew_fcn():
    Q = np.eye(4)
    R = np.eye(2)
    s = np.array([1, 2, 3, 4])
    a = np.array([0, 0])

    # Modulo 2 for all selected states
    idcs = [0, 1, 3]
    rew_fcn = QuadrErrRewFcn(Q, R)
    task = RadiallySymmDesStateTask(EnvSpec(None, None, None), np.zeros(4), rew_fcn, idcs, 2)
    r = task.step_rew(s, a)
    assert r == -(1**2 + 3**2)

    # Different modulo factor for the selected states
    idcs = [1, 3]
    task = RadiallySymmDesStateTask(EnvSpec(None, None, None), np.zeros(4), rew_fcn, idcs, np.array([2, 3]))
    r = task.step_rew(s, a)
    assert r == -(1**2 + 3**2 + 1**2)