def test_composite_task_structure(envspec_432, task_type): state_des1 = np.zeros(3) state_des2 = -.5*np.ones(3) state_des3 = +.5*np.ones(3) rew_fcn = MinusOnePerStepRewFcn() t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) if task_type == 'ParallelTasks': ct = ParallelTasks([t1, t2, t3]) elif task_type == 'SequentialTasks': ct = SequentialTasks([t1, t2, t3]) else: raise NotImplementedError ct.reset(env_spec=envspec_432) assert len(ct) == 3 assert ct.env_spec.obs_space == envspec_432.obs_space assert ct.env_spec.act_space == envspec_432.act_space assert ct.env_spec.state_space == envspec_432.state_space assert isinstance(ct.tasks[0].rew_fcn, RewFcn) assert isinstance(ct.tasks[1].rew_fcn, RewFcn) assert isinstance(ct.tasks[2].rew_fcn, RewFcn) if type == 'ParallelTasks': assert np.all(ct.state_des[0] == state_des1) assert np.all(ct.state_des[1] == state_des2) assert np.all(ct.state_des[2] == state_des3) elif type == 'SequentialTasks': assert np.all(ct.state_des == state_des1)
def _create_main_task(self, task_args: dict) -> Task: # Create a DesStateTask that masks everything but the ball position idcs = list( range(self.state_space.flat_dim - 6, self.state_space.flat_dim - 3)) # Cartesian ball position spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step. # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to # init_space.sample(), which is first called in reset() if task_args.get('sparse_rew_fcn', False): factor = task_args.get('success_bonus', 1) # Binary final reward task main_task = FinalRewTask(ConditionOnlyTask( spec, condition_fcn=self.check_ball_in_cup, is_success_condition=True), mode=FinalRewMode(always_positive=True), factor=factor) # Yield -1 on fail after the main task ist done (successfully or not) dont_fail_after_succ_task = FinalRewTask( GoallessTask(spec, ZeroPerStepRewFcn()), mode=FinalRewMode(always_negative=True), factor=factor) # Augment the binary task with an endless dummy task, to avoid early stopping task = SequentialTasks((main_task, dont_fail_after_succ_task)) return MaskedTask(self.spec, task, idcs) else: state_des = self.sim.data.get_site_xpos( 'cup_goal') # this is a reference R_default = np.diag([ 0, 0, 1, 1e-2, 1e-2, 1e-1 ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2]) rew_fcn = ExpQuadrErrRewFcn( Q=task_args.get('Q', np.diag([ 2e1, 1e-4, 2e1 ])), # distance ball - cup; shouldn't move in y-direction R=task_args.get('R', R_default) # last joint is really unreliable ) task = DesStateTask(spec, state_des, rew_fcn) # Wrap the masked DesStateTask to add a bonus for the best state in the rollout return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs), max_steps=self.max_steps, factor=task_args.get( 'final_factor', 0.05))
def create_check_all_boundaries_task(env_spec: EnvSpec, penalty: float) -> FinalRewTask: # Check every limit (nut just of a subspace of the state state as it could happen when using a MaskedTask) return FinalRewTask(DesStateTask(env_spec, np.zeros(env_spec.state_space.shape), ZeroPerStepRewFcn(), never_succeeded), FinalRewMode(always_negative=True), factor=penalty)
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0])) return FinalRewTask( RadiallySymmDesStateTask(self.spec, state_des, UnderActuatedSwingUpRewFcn(), idcs=[1]), mode=FinalRewMode(always_negative=True), )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', np.zeros(2)) Q = task_args.get('Q', np.diag([1e1, 1e-2])) R = task_args.get('R', np.diag([1e-6])) return FinalRewTask( DesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R)), factor=1e3, mode=FinalRewMode(always_negative=True) )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0])) Q = task_args.get("Q", np.diag([5e-0, 1e1, 1e-2, 1e-2])) R = task_args.get("R", np.diag([1e-3])) return FinalRewTask( RadiallySymmDesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R), idcs=[1]), mode=FinalRewMode(state_dependent=True, time_dependent=True), )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', None) if state_des is None: state_des = np.array([0., np.pi, 0., 0.]) return FinalRewTask(RadiallySymmDesStateTask( self.spec, state_des, UnderActuatedSwingUpRewFcn(c_act=1e-2), idcs=[1]), mode=FinalRewMode(always_negative=True))
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0])) Q = task_args.get("Q", np.diag([3e-1, 5e-1, 5e-3, 1e-3])) R = task_args.get("R", np.diag([1e-3])) rew_fcn = QuadrErrRewFcn(Q, R) return FinalRewTask( RadiallySymmDesStateTask(self.spec, state_des, rew_fcn, idcs=[1]), mode=FinalRewMode(always_negative=True), factor=1e4, )
def _create_task(self, task_args: dict) -> Task: # Create a DesStateTask that masks everything but the ball position idcs = list( range(self.state_space.flat_dim - 3, self.state_space.flat_dim)) # Cartesian ball position spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step. # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to # init_space.sample(), which is first called in reset() if task_args.get('sparse_rew_fcn', False): # Binary final reward task task = FinalRewTask(ConditionOnlyTask( spec, condition_fcn=self.check_ball_in_cup, is_success_condition=True), mode=FinalRewMode(always_positive=True), factor=1) return MaskedTask(self.spec, task, idcs) else: # If we do not use copy(), state_des is a reference to passed body and updates automatically at each step state_des = self.sim.data.get_site_xpos( 'cup_goal') # this is a reference rew_fcn = ExpQuadrErrRewFcn( Q=task_args.get('Q', np.diag([ 1e1, 1e5, 2e1 ])), # distance ball - cup; shouldn't move in y-direction R=task_args.get('R', np.diag([ 1e-1, 1e-1, 1e-1, 1e-2, 1e-2, 1e-2 ])) # desired joint angles and velocities ) task = DesStateTask(spec, state_des, rew_fcn) # Wrap the masked DesStateTask to add a bonus for the best state in the rollout return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs), max_steps=self.max_steps, factor=task_args.get( 'final_factor', 1.))
def create_check_all_boundaries_task(env_spec: EnvSpec, penalty: float) -> FinalRewTask: """ Create a task that is checking if any of the state space bounds is violated. This checks every limit and not just of a subspace of the state state as it could happen when using a `MaskedTask`. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param penalty: scalar cost (positive values) for violating the bounds :return: masked task that only considers a subspace of all observations """ return FinalRewTask( DesStateTask(env_spec, np.zeros(env_spec.state_space.shape), ZeroPerStepRewFcn(), never_succeeded), FinalRewMode(always_negative=True), factor=penalty, )
def create_insert_task(env_spec: EnvSpec, state_des: np.ndarray, rew_fcn: RewFcn, success_fcn: Callable): # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = [ 'Effector_X', 'Effector_Z', 'Effector_B', 'Effector_Xd', 'Effector_Zd', 'Effector_Bd' ] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))) # Create a wrapped desired state task with the goal behind the wall fdst = FinalRewTask(DesStateTask(spec, state_des, rew_fcn, success_fcn), mode=FinalRewMode(state_dependent=True, time_dependent=True)) # Mask selected states return MaskedTask(env_spec, fdst, idcs)
def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray, succ_thold: float): """ Create a task for putting the ball into a whole. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param hole_pos: planar x and yy position of the goal's center :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully :return: masked task that only considers a subspace of all observations """ if not hole_pos.size == 2: raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, )) # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = ["Ball_X", "Ball_Y"] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))) # Create a desired state task dst = DesStateTask( spec, state_des=hole_pos, rew_fcn=AbsErrRewFcn(q=np.ones(2), r=1e-4 * np.ones(spec.act_space.shape)), success_fcn=functools.partial(proximity_succeeded, thold_dist=succ_thold), ) frt = FinalRewTask(dst, FinalRewMode(always_positive=True)) # Return the masked tasks return MaskedTask(env_spec, frt, idcs)
def test_sequential_task_function(envspec_432, hold_rew_when_done): # Create env spec and sub-tasks (state_space is necessary for the has_failed function) state_des1 = -.5*np.ones(3) state_des2 = np.zeros(3) state_des3 = +.5*np.ones(3) rew_fcn = MinusOnePerStepRewFcn() succ_fcn = functools.partial(proximity_succeeded, thold_dist=1e-6) # necessary to stop a sub-task on success t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) st = FinalRewTask(SequentialTasks([t1, t2, t3], 0, hold_rew_when_done), mode=FinalRewMode(always_positive=True), factor=100) # Create artificial dynamics by hard-coding a sequence of states num_steps = 12 fixed_traj = np.linspace(-.5, +.6, num_steps, endpoint=True) # for the final step, all sub-tasks would be true r = [-pyrado.inf]*num_steps for i in range(num_steps): # Advance the artificial state state = fixed_traj[i]*np.ones(3) # Get all sub-tasks step rew, check if they are done, and if so r[i] = st.step_rew(state, act=np.zeros(2), remaining_steps=num_steps - (i + 1)) # Check if reaching the sub-goals is recognized if np.all(state == state_des1): assert st.wrapped_task.tasks[0].has_succeeded(state) if hold_rew_when_done: assert r[i] == 9 # only true for this specific setup else: assert r[i] == 9 # only true for this specific setup if np.all(state == state_des2): assert st.wrapped_task.tasks[1].has_succeeded(state) if hold_rew_when_done: assert r[i] == 8 # only true for this specific setup else: assert r[i] == 9 # only true for this specific setup if np.all(state == state_des3): assert st.wrapped_task.tasks[2].has_succeeded(state) if hold_rew_when_done: assert r[i] == 7 # only true for this specific setup else: assert r[i] == 9 # only true for this specific setup if i < 10: # The combined task is not successful until all sub-tasks are successful assert not st.has_succeeded(state) elif i == 10: # Should succeed on the second to last assert st.has_succeeded(state) assert st.final_rew(state, 0) == pytest.approx(100.) elif i == 11: # The very last step reward if hold_rew_when_done: assert r[i] == -3. else: assert r[i] == 0. assert st.final_rew(state, 0) == pytest.approx(0.) # only yield once
def _create_task(self, task_args: dict) -> Task: # Define the indices for selection. This needs to match the observations' names in RcsPySim. if task_args.get('consider_velocities', False): idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd'] else: idcs = ['Effector_X', 'Effector_Z'] # Get the masked environment specification spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # Get and set goal position in world coordinates for all three sub-goals p1 = self.get_body_position('Goal1', '', '') p2 = self.get_body_position('Goal2', '', '') p3 = self.get_body_position('Goal3', '', '') if task_args.get('consider_velocities', False): Q = np.diag([1e0, 1e0, 1e-1, 1e-1]) state_des1 = np.array([p1[0], p1[2], 0, 0]) state_des2 = np.array([p2[0], p2[2], 0, 0]) state_des3 = np.array([p3[0], p3[2], 0, 0]) else: Q = np.diag([1e0, 1e0]) state_des1 = np.array([p1[0], p1[2]]) state_des2 = np.array([p2[0], p2[2]]) state_des3 = np.array([p3[0], p3[2]]) success_fcn = functools.partial(proximity_succeeded, thold_dist=7.5e-2, dims=[0, 1]) # min distance = 7cm R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim)) # Create the tasks subtask_11 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_21 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_1p = ParallelTasks([subtask_11, subtask_21], hold_rew_when_done=True, verbose=False) subtask_3 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_12 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_22 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_2p = ParallelTasks([subtask_12, subtask_22], hold_rew_when_done=True, verbose=False) task = FinalRewTask(SequentialTasks( [subtask_1p, subtask_3, subtask_2p], hold_rew_when_done=True, verbose=True), mode=FinalRewMode(always_positive=True), factor=2e3) masked_task = MaskedTask(self.spec, task, idcs) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks([masked_task, task_check_bounds])
def _create_task(self, task_args: dict) -> Task: # The wrapped task acts as a dummy and carries the FinalRewTask return FinalRewTask(GoallessTask(self.spec, ZeroPerStepRewFcn()), mode=FinalRewMode(user_input=True))
def _create_task(self, task_args: dict) -> Task: # Define the indices for selection. This needs to match the observations' names in RcsPySim. if task_args.get("consider_velocities", False): idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"] else: idcs = ["Effector_X", "Effector_Z"] # Get the masked environment specification spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs)), ) # Get the goal position in world coordinates for all three sub-goals p1 = self.get_body_position("Goal1", "", "") p2 = self.get_body_position("Goal2", "", "") p3 = self.get_body_position("Goal3", "", "") state_des1 = np.array([p1[0], p1[2], 0, 0]) state_des2 = np.array([p2[0], p2[2], 0, 0]) state_des3 = np.array([p3[0], p3[2], 0, 0]) if task_args.get("consider_velocities", False): Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3]) else: Q = np.diag([1e0, 1e0]) state_des1 = state_des1[:2] state_des2 = state_des2[:2] state_des3 = state_des3[:2] success_fcn = partial(proximity_succeeded, thold_dist=7.5e-2, dims=[0, 1]) # min distance = 7cm R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim)) # Create the tasks subtask_1 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_2 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_3 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_4 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_5 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_6 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) task = FinalRewTask( SequentialTasks( [ subtask_1, subtask_2, subtask_3, subtask_4, subtask_5, subtask_6 ], hold_rew_when_done=True, verbose=True, ), mode=FinalRewMode(always_positive=True), factor=5e3, ) masked_task = MaskedTask(self.spec, task, idcs) # Additional tasks task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) if isinstance(self, Planar3LinkJointCtrlSim): # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks([masked_task, task_check_bounds], easily_satisfied=True) else: task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(2), r=np.zeros(self.act_space.shape))) # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks( [masked_task, task_check_bounds, task_ts_discrepancy], easily_satisfied=True)