def test_composite_task_structure(envspec_432, task_type): state_des1 = np.zeros(3) state_des2 = -.5*np.ones(3) state_des3 = +.5*np.ones(3) rew_fcn = MinusOnePerStepRewFcn() t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn), mode=FinalRewMode(always_positive=True), factor=10) if task_type == 'ParallelTasks': ct = ParallelTasks([t1, t2, t3]) elif task_type == 'SequentialTasks': ct = SequentialTasks([t1, t2, t3]) else: raise NotImplementedError ct.reset(env_spec=envspec_432) assert len(ct) == 3 assert ct.env_spec.obs_space == envspec_432.obs_space assert ct.env_spec.act_space == envspec_432.act_space assert ct.env_spec.state_space == envspec_432.state_space assert isinstance(ct.tasks[0].rew_fcn, RewFcn) assert isinstance(ct.tasks[1].rew_fcn, RewFcn) assert isinstance(ct.tasks[2].rew_fcn, RewFcn) if type == 'ParallelTasks': assert np.all(ct.state_des[0] == state_des1) assert np.all(ct.state_des[1] == state_des2) assert np.all(ct.state_des[2] == state_des3) elif type == 'SequentialTasks': assert np.all(ct.state_des == state_des1)
def test_set_goals_fo_composite_tasks(sub_tasks): # Check ParallelTasks pt = ParallelTasks(sub_tasks) pt.state_des = 1*[np.array([-0.2, 0.4, 0])] assert np.all(pt.state_des == np.array([-0.2, 0.4, 0])) pt.space_des = 1*[BoxSpace(-0.5, 2., shape=3)] assert pt.space_des[0] == BoxSpace(-0.5, 2., shape=3) # pt.space_des is a list # Check SequentialTasks st = SequentialTasks(sub_tasks) st.state_des = np.array([-0.2, 0.4, 0]) assert np.all(st.state_des == np.array([-0.2, 0.4, 0])) st.idx_curr = 1 st.space_des = BoxSpace(-0.5, 2., shape=3) assert st.space_des == BoxSpace(-0.5, 2., shape=3)
def _create_task(self, task_args: dict) -> Task: # Create the tasks # task_box_flip = create_flipping_task(self.spec, ["Box_A"], des_angle_delta=np.pi / 2.0, endless=False) task_box_lift = create_lifting_task(self.spec, ["Box_Z"], des_height=0.79, succ_thold=0.05) task_post_lift = create_home_pos_task( self.spec, ["PowerGrasp_R_Y", "PowerGrasp_R_Z"], state_des=np.array([-0.1, 1.1]) ) tasks_box = SequentialTasks([task_box_lift, task_post_lift], hold_rew_when_done=True, verbose=True) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) task_force = create_forcemin_task( self.spec, ["WristLoadCellLBR_R_Fy", "WristLoadCellLBR_R_Fz"], Q=np.diag([1e-6, 1e-6]) ) # task_collision = create_collision_task(self.spec, factor=1.0) # task_ts_discrepancy = create_task_space_discrepancy_task( # self.spec, AbsErrRewFcn(q=0.5 * np.ones(2), r=np.zeros(self.act_space.shape)) # ) return ParallelTasks( [ tasks_box, task_check_bounds, # task_force, # task_collision, # task_ts_discrepancy ], hold_rew_when_done=False, )
def _create_task(self, task_args: dict) -> Task: # Set up task. We track the distance to the goal for both hands separately. continuous_rew_fcn = task_args.get('continuous_rew_fcn', True) mps_left = task_args.get('mps_left') mps_right = task_args.get('mps_right') if continuous_rew_fcn: Q = np.diag([1, 1e-3]) R = 1e-4 * np.eye(self.act_space.flat_dim) rew_fcn_factory = lambda: ExpQuadrErrRewFcn(Q, R) else: rew_fcn_factory = MinusOnePerStepRewFcn succ_thold = 7.5e-2 tasks_left = [ create_goal_dist_distvel_task(self.spec, i, rew_fcn_factory(), succ_thold) for i in range(len(mps_left)) ] tasks_right = [ create_goal_dist_distvel_task(self.spec, i + len(mps_left), rew_fcn_factory(), succ_thold) for i in range(len(mps_right)) ] return ParallelTasks([ SequentialTasks(tasks_left, hold_rew_when_done=continuous_rew_fcn), SequentialTasks(tasks_right, hold_rew_when_done=continuous_rew_fcn), ], hold_rew_when_done=continuous_rew_fcn)
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function hole_pos = task_args.get("hole_pos", None) if hole_pos is None: # Get the goal position in world coordinates hole_pos = self.get_body_position( "Hole", "", "")[:2] # x and y positions in world frame task_main = create_mini_golf_task(self.spec, hole_pos, succ_thold=0.05) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) return ParallelTasks([task_main, task_check_bounds], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: # Create the tasks continuous_rew_fcn = task_args.get('continuous_rew_fcn', True) task_box = create_box_flip_task(self.spec, continuous_rew_fcn) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) # task_collision = create_collision_task(self.spec, factor=1e-2) # task_ts_discrepancy = create_task_space_discrepancy_task(self.spec, # AbsErrRewFcn(q=1e-2*np.ones(2), # r=np.zeros(self.act_space.shape))) return ParallelTasks([ task_box, task_check_bounds, # task_collision, # task_ts_discrepancy ], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: # Create the tasks continuous_rew_fcn = task_args.get('continuous_rew_fcn', True) task_box = create_box_lift_task(self.spec, continuous_rew_fcn, succ_thold=0.03) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) task_collision = create_collision_task(self.spec, factor=1.) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(3), r=np.zeros(self.act_space.shape))) return ParallelTasks( [task_box, task_check_bounds, task_collision, task_ts_discrepancy], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: if task_args.get('sparse_rew_fcn', False): # Create a task with binary reward return self._create_main_task(task_args) else: # Create two (or three) parallel running task. # 1.) Main task: Desired state task for the cartesian ball distance # 2.) Deviation task: Desired state task for the cartesian- and joint deviation from the init position # 3.) Binary Bonus: Adds a binary bonus when ball is catched [inactive by default] return ParallelTasks([ self._create_main_task(task_args), self._create_deviation_task(task_args), self._create_main_task( dict(sparse_rew_fcn=True, success_bonus=task_args.get('success_bonus', 0))) ])
def _create_task(self, task_args: dict) -> Task: # Create the tasks des_state1 = self.get_body_position( 'Goal', '', '')[:2] # x and y coordinates in world frame task_box = create_extract_ball_task(self.spec, task_args, des_state1) des_state2 = np.array( self.get_body_position('Slider', '', '')[1] - 0.5) # y coordinate in world frame task_slider = create_extract_slider_task(self.spec, task_args, des_state2) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) task_collision = create_collision_task(self.spec, factor=5e-2) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(6), r=np.zeros(self.act_space.flat_dim))) return ParallelTasks([ task_box, task_slider, task_check_bounds, task_collision, task_ts_discrepancy ], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", None) if state_des is None: # Get the goal position in world coordinates p = self.get_body_position("Goal", "", "") state_des = np.array([p[0], p[2], 0, 0, 0, 0]) # X, Z, B, Xd, Zd, Bd # Create the individual subtasks task_reach_goal = create_insert_task( self.spec, state_des, rew_fcn=ExpQuadrErrRewFcn( Q=np.diag([2e1, 2e1, 1e-1, 1e-2, 1e-2, 1e-2]), R=2e-2 * np.eye(self.act_space.flat_dim)), success_fcn=partial(proximity_succeeded, thold_dist=0.07, dims=[0, 1, 2]), # position and angle ) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.1 * np.ones(2), r=np.zeros(self.act_space.shape))) return ParallelTasks([task_reach_goal, task_ts_discrepancy])
def test_parallel_task_function(envspec_432, hold_rew_when_done): # Create env spec and sub-tasks (state_space is necessary for the has_failed function) state_des1 = np.zeros(3) state_des2 = -.5*np.ones(3) state_des3 = +.5*np.ones(3) rew_fcn = MinusOnePerStepRewFcn() succ_fcn = functools.partial(proximity_succeeded, thold_dist=1e-6) # necessary to stop a sub-task on success t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn, succ_fcn), mode=FinalRewMode(always_positive=True), factor=10) pt = FinalRewTask(ParallelTasks([t1, t2, t3], hold_rew_when_done), mode=FinalRewMode(always_positive=True), factor=100) # Create artificial dynamics by hard-coding a sequence of states num_steps = 12 fixed_traj = np.linspace(-.5, +.6, num_steps, endpoint=True) # for the final step, all sub-tasks would be true r = [-pyrado.inf]*num_steps for i in range(num_steps): # Advance the artificial state state = fixed_traj[i]*np.ones(3) # Get all sub-tasks step rew, check if they are done, and if so r[i] = pt.step_rew(state, act=np.zeros(2), remaining_steps=11 - i) # Check if reaching the sub-goals is recognized if np.all(state == state_des1): assert pt.wrapped_task.tasks[0].has_succeeded(state) if hold_rew_when_done: assert r[i] == 7 # only true for this specific setup else: assert r[i] == 8 # only true for this specific setup if np.all(state == state_des2): assert pt.wrapped_task.tasks[1].has_succeeded(state) if hold_rew_when_done: assert r[i] == 7 # only true for this specific setup else: assert r[i] == 7 # only true for this specific setup if np.all(state == state_des3): assert pt.wrapped_task.tasks[2].has_succeeded(state) if hold_rew_when_done: assert r[i] == 7 # only true for this specific setup else: assert r[i] == 9 # only true for this specific setup if i < 10: # The combined task is not successful until all sub-tasks are successful assert not pt.has_succeeded(state) elif i == 10: # Should succeed on the second to last assert pt.has_succeeded(state) assert pt.final_rew(state, 0) == pytest.approx(100.) elif i == 11: # The very last step reward if hold_rew_when_done: assert r[i] == -3. else: assert r[i] == 0. assert pt.final_rew(state, 0) == pytest.approx(0.) # only yield once
def _create_task(self, task_args: dict) -> Task: # Define the indices for selection. This needs to match the observations' names in RcsPySim. if task_args.get('consider_velocities', False): idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd'] else: idcs = ['Effector_X', 'Effector_Z'] # Get the masked environment specification spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # Get and set goal position in world coordinates for all three sub-goals p1 = self.get_body_position('Goal1', '', '') p2 = self.get_body_position('Goal2', '', '') p3 = self.get_body_position('Goal3', '', '') if task_args.get('consider_velocities', False): Q = np.diag([1e0, 1e0, 1e-1, 1e-1]) state_des1 = np.array([p1[0], p1[2], 0, 0]) state_des2 = np.array([p2[0], p2[2], 0, 0]) state_des3 = np.array([p3[0], p3[2], 0, 0]) else: Q = np.diag([1e0, 1e0]) state_des1 = np.array([p1[0], p1[2]]) state_des2 = np.array([p2[0], p2[2]]) state_des3 = np.array([p3[0], p3[2]]) success_fcn = functools.partial(proximity_succeeded, thold_dist=7.5e-2, dims=[0, 1]) # min distance = 7cm R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim)) # Create the tasks subtask_11 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_21 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_1p = ParallelTasks([subtask_11, subtask_21], hold_rew_when_done=True, verbose=False) subtask_3 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_12 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_22 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_2p = ParallelTasks([subtask_12, subtask_22], hold_rew_when_done=True, verbose=False) task = FinalRewTask(SequentialTasks( [subtask_1p, subtask_3, subtask_2p], hold_rew_when_done=True, verbose=True), mode=FinalRewMode(always_positive=True), factor=2e3) masked_task = MaskedTask(self.spec, task, idcs) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks([masked_task, task_check_bounds])
def _create_task(self, task_args: dict) -> Task: # Define the indices for selection. This needs to match the observations' names in RcsPySim. if task_args.get("consider_velocities", False): idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"] else: idcs = ["Effector_X", "Effector_Z"] # Get the masked environment specification spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs)), ) # Get the goal position in world coordinates for all three sub-goals p1 = self.get_body_position("Goal1", "", "") p2 = self.get_body_position("Goal2", "", "") p3 = self.get_body_position("Goal3", "", "") state_des1 = np.array([p1[0], p1[2], 0, 0]) state_des2 = np.array([p2[0], p2[2], 0, 0]) state_des3 = np.array([p3[0], p3[2], 0, 0]) if task_args.get("consider_velocities", False): Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3]) else: Q = np.diag([1e0, 1e0]) state_des1 = state_des1[:2] state_des2 = state_des2[:2] state_des3 = state_des3[:2] success_fcn = partial(proximity_succeeded, thold_dist=7.5e-2, dims=[0, 1]) # min distance = 7cm R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim)) # Create the tasks subtask_1 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_2 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_3 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_4 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_5 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_6 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) task = FinalRewTask( SequentialTasks( [ subtask_1, subtask_2, subtask_3, subtask_4, subtask_5, subtask_6 ], hold_rew_when_done=True, verbose=True, ), mode=FinalRewMode(always_positive=True), factor=5e3, ) masked_task = MaskedTask(self.spec, task, idcs) # Additional tasks task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) if isinstance(self, Planar3LinkJointCtrlSim): # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks([masked_task, task_check_bounds], easily_satisfied=True) else: task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(2), r=np.zeros(self.act_space.shape))) # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks( [masked_task, task_check_bounds, task_ts_discrepancy], easily_satisfied=True)