示例#1
0
def test_composite_task_structure(envspec_432, task_type):
    state_des1 = np.zeros(3)
    state_des2 = -.5*np.ones(3)
    state_des3 = +.5*np.ones(3)
    rew_fcn = MinusOnePerStepRewFcn()
    t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)

    if task_type == 'ParallelTasks':
        ct = ParallelTasks([t1, t2, t3])
    elif task_type == 'SequentialTasks':
        ct = SequentialTasks([t1, t2, t3])
    else:
        raise NotImplementedError
    ct.reset(env_spec=envspec_432)

    assert len(ct) == 3
    assert ct.env_spec.obs_space == envspec_432.obs_space
    assert ct.env_spec.act_space == envspec_432.act_space
    assert ct.env_spec.state_space == envspec_432.state_space
    assert isinstance(ct.tasks[0].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[1].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[2].rew_fcn, RewFcn)

    if type == 'ParallelTasks':
        assert np.all(ct.state_des[0] == state_des1)
        assert np.all(ct.state_des[1] == state_des2)
        assert np.all(ct.state_des[2] == state_des3)
    elif type == 'SequentialTasks':
        assert np.all(ct.state_des == state_des1)
示例#2
0
def test_set_goals_fo_composite_tasks(sub_tasks):
    # Check ParallelTasks
    pt = ParallelTasks(sub_tasks)
    pt.state_des = 1*[np.array([-0.2, 0.4, 0])]
    assert np.all(pt.state_des == np.array([-0.2, 0.4, 0]))
    pt.space_des = 1*[BoxSpace(-0.5, 2., shape=3)]
    assert pt.space_des[0] == BoxSpace(-0.5, 2., shape=3)  # pt.space_des is a list

    # Check SequentialTasks
    st = SequentialTasks(sub_tasks)
    st.state_des = np.array([-0.2, 0.4, 0])
    assert np.all(st.state_des == np.array([-0.2, 0.4, 0]))
    st.idx_curr = 1
    st.space_des = BoxSpace(-0.5, 2., shape=3)
    assert st.space_des == BoxSpace(-0.5, 2., shape=3)
示例#3
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        # task_box_flip = create_flipping_task(self.spec, ["Box_A"], des_angle_delta=np.pi / 2.0, endless=False)
        task_box_lift = create_lifting_task(self.spec, ["Box_Z"], des_height=0.79, succ_thold=0.05)
        task_post_lift = create_home_pos_task(
            self.spec, ["PowerGrasp_R_Y", "PowerGrasp_R_Z"], state_des=np.array([-0.1, 1.1])
        )
        tasks_box = SequentialTasks([task_box_lift, task_post_lift], hold_rew_when_done=True, verbose=True)
        task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3)
        task_force = create_forcemin_task(
            self.spec, ["WristLoadCellLBR_R_Fy", "WristLoadCellLBR_R_Fz"], Q=np.diag([1e-6, 1e-6])
        )
        # task_collision = create_collision_task(self.spec, factor=1.0)
        # task_ts_discrepancy = create_task_space_discrepancy_task(
        #     self.spec, AbsErrRewFcn(q=0.5 * np.ones(2), r=np.zeros(self.act_space.shape))
        # )

        return ParallelTasks(
            [
                tasks_box,
                task_check_bounds,
                # task_force,
                # task_collision,
                # task_ts_discrepancy
            ],
            hold_rew_when_done=False,
        )
    def _create_task(self, task_args: dict) -> Task:
        # Set up task. We track the distance to the goal for both hands separately.
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        mps_left = task_args.get('mps_left')
        mps_right = task_args.get('mps_right')

        if continuous_rew_fcn:
            Q = np.diag([1, 1e-3])
            R = 1e-4 * np.eye(self.act_space.flat_dim)
            rew_fcn_factory = lambda: ExpQuadrErrRewFcn(Q, R)
        else:
            rew_fcn_factory = MinusOnePerStepRewFcn
        succ_thold = 7.5e-2

        tasks_left = [
            create_goal_dist_distvel_task(self.spec, i, rew_fcn_factory(),
                                          succ_thold)
            for i in range(len(mps_left))
        ]
        tasks_right = [
            create_goal_dist_distvel_task(self.spec, i + len(mps_left),
                                          rew_fcn_factory(), succ_thold)
            for i in range(len(mps_right))
        ]

        return ParallelTasks([
            SequentialTasks(tasks_left, hold_rew_when_done=continuous_rew_fcn),
            SequentialTasks(tasks_right,
                            hold_rew_when_done=continuous_rew_fcn),
        ],
                             hold_rew_when_done=continuous_rew_fcn)
示例#5
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        hole_pos = task_args.get("hole_pos", None)
        if hole_pos is None:
            # Get the goal position in world coordinates
            hole_pos = self.get_body_position(
                "Hole", "", "")[:2]  # x and y positions in world frame
        task_main = create_mini_golf_task(self.spec, hole_pos, succ_thold=0.05)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)

        return ParallelTasks([task_main, task_check_bounds],
                             hold_rew_when_done=False)
示例#6
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        task_box = create_box_flip_task(self.spec, continuous_rew_fcn)
        task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3)
        # task_collision = create_collision_task(self.spec, factor=1e-2)
        # task_ts_discrepancy = create_task_space_discrepancy_task(self.spec,
        #                                                          AbsErrRewFcn(q=1e-2*np.ones(2),
        #                                                                       r=np.zeros(self.act_space.shape)))

        return ParallelTasks([
            task_box,
            task_check_bounds,
            # task_collision,
            # task_ts_discrepancy
        ], hold_rew_when_done=False)
示例#7
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        task_box = create_box_lift_task(self.spec,
                                        continuous_rew_fcn,
                                        succ_thold=0.03)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        task_collision = create_collision_task(self.spec, factor=1.)
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.5 * np.ones(3), r=np.zeros(self.act_space.shape)))

        return ParallelTasks(
            [task_box, task_check_bounds, task_collision, task_ts_discrepancy],
            hold_rew_when_done=False)
示例#8
0
 def _create_task(self, task_args: dict) -> Task:
     if task_args.get('sparse_rew_fcn', False):
         # Create a task with binary reward
         return self._create_main_task(task_args)
     else:
         # Create two (or three) parallel running task.
         #   1.) Main task: Desired state task for the cartesian ball distance
         #   2.) Deviation task: Desired state task for the cartesian- and joint deviation from the init position
         #   3.) Binary Bonus: Adds a binary bonus when ball is catched [inactive by default]
         return ParallelTasks([
             self._create_main_task(task_args),
             self._create_deviation_task(task_args),
             self._create_main_task(
                 dict(sparse_rew_fcn=True,
                      success_bonus=task_args.get('success_bonus', 0)))
         ])
示例#9
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        des_state1 = self.get_body_position(
            'Goal', '', '')[:2]  # x and y coordinates in world frame
        task_box = create_extract_ball_task(self.spec, task_args, des_state1)
        des_state2 = np.array(
            self.get_body_position('Slider', '', '')[1] -
            0.5)  # y coordinate in world frame
        task_slider = create_extract_slider_task(self.spec, task_args,
                                                 des_state2)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        task_collision = create_collision_task(self.spec, factor=5e-2)
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.5 * np.ones(6),
                         r=np.zeros(self.act_space.flat_dim)))

        return ParallelTasks([
            task_box, task_slider, task_check_bounds, task_collision,
            task_ts_discrepancy
        ],
                             hold_rew_when_done=False)
示例#10
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", None)
        if state_des is None:
            # Get the goal position in world coordinates
            p = self.get_body_position("Goal", "", "")
            state_des = np.array([p[0], p[2], 0, 0, 0,
                                  0])  # X, Z, B, Xd, Zd, Bd

        # Create the individual subtasks
        task_reach_goal = create_insert_task(
            self.spec,
            state_des,
            rew_fcn=ExpQuadrErrRewFcn(
                Q=np.diag([2e1, 2e1, 1e-1, 1e-2, 1e-2, 1e-2]),
                R=2e-2 * np.eye(self.act_space.flat_dim)),
            success_fcn=partial(proximity_succeeded,
                                thold_dist=0.07,
                                dims=[0, 1, 2]),  # position and angle
        )
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.1 * np.ones(2), r=np.zeros(self.act_space.shape)))
        return ParallelTasks([task_reach_goal, task_ts_discrepancy])
示例#11
0
def test_parallel_task_function(envspec_432, hold_rew_when_done):
    # Create env spec and sub-tasks (state_space is necessary for the has_failed function)
    state_des1 = np.zeros(3)
    state_des2 = -.5*np.ones(3)
    state_des3 = +.5*np.ones(3)
    rew_fcn = MinusOnePerStepRewFcn()
    succ_fcn = functools.partial(proximity_succeeded, thold_dist=1e-6)  # necessary to stop a sub-task on success
    t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)
    t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)
    t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)

    pt = FinalRewTask(ParallelTasks([t1, t2, t3], hold_rew_when_done),
                      mode=FinalRewMode(always_positive=True), factor=100)

    # Create artificial dynamics by hard-coding a sequence of states
    num_steps = 12
    fixed_traj = np.linspace(-.5, +.6, num_steps, endpoint=True)  # for the final step, all sub-tasks would be true
    r = [-pyrado.inf]*num_steps

    for i in range(num_steps):
        # Advance the artificial state
        state = fixed_traj[i]*np.ones(3)

        # Get all sub-tasks step rew, check if they are done, and if so
        r[i] = pt.step_rew(state, act=np.zeros(2), remaining_steps=11 - i)

        # Check if reaching the sub-goals is recognized
        if np.all(state == state_des1):
            assert pt.wrapped_task.tasks[0].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 7  # only true for this specific setup
            else:
                assert r[i] == 8  # only true for this specific setup
        if np.all(state == state_des2):
            assert pt.wrapped_task.tasks[1].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 7  # only true for this specific setup
            else:
                assert r[i] == 7  # only true for this specific setup
        if np.all(state == state_des3):
            assert pt.wrapped_task.tasks[2].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 7  # only true for this specific setup
            else:
                assert r[i] == 9  # only true for this specific setup

        if i < 10:
            # The combined task is not successful until all sub-tasks are successful
            assert not pt.has_succeeded(state)
        elif i == 10:
            # Should succeed on the second to last
            assert pt.has_succeeded(state)
            assert pt.final_rew(state, 0) == pytest.approx(100.)
        elif i == 11:
            # The very last step reward
            if hold_rew_when_done:
                assert r[i] == -3.
            else:
                assert r[i] == 0.
            assert pt.final_rew(state, 0) == pytest.approx(0.)  # only yield once
示例#12
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get('consider_velocities', False):
            idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd']
        else:
            idcs = ['Effector_X', 'Effector_Z']

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # Get and set goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position('Goal1', '', '')
        p2 = self.get_body_position('Goal2', '', '')
        p3 = self.get_body_position('Goal3', '', '')
        if task_args.get('consider_velocities', False):
            Q = np.diag([1e0, 1e0, 1e-1, 1e-1])
            state_des1 = np.array([p1[0], p1[2], 0, 0])
            state_des2 = np.array([p2[0], p2[2], 0, 0])
            state_des3 = np.array([p3[0], p3[2], 0, 0])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = np.array([p1[0], p1[2]])
            state_des2 = np.array([p2[0], p2[2]])
            state_des3 = np.array([p3[0], p3[2]])

        success_fcn = functools.partial(proximity_succeeded,
                                        thold_dist=7.5e-2,
                                        dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_11 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_21 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_1p = ParallelTasks([subtask_11, subtask_21],
                                   hold_rew_when_done=True,
                                   verbose=False)
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_12 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_22 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_2p = ParallelTasks([subtask_12, subtask_22],
                                   hold_rew_when_done=True,
                                   verbose=False)
        task = FinalRewTask(SequentialTasks(
            [subtask_1p, subtask_3, subtask_2p],
            hold_rew_when_done=True,
            verbose=True),
                            mode=FinalRewMode(always_positive=True),
                            factor=2e3)
        masked_task = MaskedTask(self.spec, task, idcs)

        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)

        # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
        return ParallelTasks([masked_task, task_check_bounds])
示例#13
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get("consider_velocities", False):
            idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"]
        else:
            idcs = ["Effector_X", "Effector_Z"]

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )

        # Get the goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position("Goal1", "", "")
        p2 = self.get_body_position("Goal2", "", "")
        p3 = self.get_body_position("Goal3", "", "")
        state_des1 = np.array([p1[0], p1[2], 0, 0])
        state_des2 = np.array([p2[0], p2[2], 0, 0])
        state_des3 = np.array([p3[0], p3[2], 0, 0])
        if task_args.get("consider_velocities", False):
            Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = state_des1[:2]
            state_des2 = state_des2[:2]
            state_des3 = state_des3[:2]

        success_fcn = partial(proximity_succeeded,
                              thold_dist=7.5e-2,
                              dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_1 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_2 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_4 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_5 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_6 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        task = FinalRewTask(
            SequentialTasks(
                [
                    subtask_1, subtask_2, subtask_3, subtask_4, subtask_5,
                    subtask_6
                ],
                hold_rew_when_done=True,
                verbose=True,
            ),
            mode=FinalRewMode(always_positive=True),
            factor=5e3,
        )
        masked_task = MaskedTask(self.spec, task, idcs)

        # Additional tasks
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        if isinstance(self, Planar3LinkJointCtrlSim):
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks([masked_task, task_check_bounds],
                                 easily_satisfied=True)
        else:
            task_ts_discrepancy = create_task_space_discrepancy_task(
                self.spec,
                AbsErrRewFcn(q=0.5 * np.ones(2),
                             r=np.zeros(self.act_space.shape)))
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks(
                [masked_task, task_check_bounds, task_ts_discrepancy],
                easily_satisfied=True)