def _select_assessor(self, base_components: Tuple[rewards.RewardComponent, ...], shaping_components: Tuple[rewards.RewardComponent, ...], shaping: Shaping) -> assessors.AssessorImpl: if shaping is Shaping.STANDARD: return assessors.AssessorImpl(base_components, shaping_components, positive_rewards=self.positive_rewards) else: no_sideslip = rewards.AsymptoticErrorComponent(name='no_sideslip', prop=prp.sideslip_deg, state_variables=self.state_variables, target=0.0, is_potential_based=True, scaling_factor=self.SIDESLIP_ERROR_SCALING_DEG) potential_based_components = (no_sideslip) if shaping is Shaping.EXTRA: return assessors.AssessorImpl(base_components, potential_based_components, positive_rewards=self.positive_rewards) elif shaping is Shaping.EXTRA_SEQUENTIAL: altitude_error, travel_direction = base_components # make the wings_level shaping reward dependent on facing the correct direction dependency_map = {no_sideslip: (travel_direction,)} return assessors.ContinuousSequentialAssessor(base_components, potential_based_components, potential_dependency_map=dependency_map, positive_rewards=self.positive_rewards)
def _make_base_reward_components( self) -> Tuple[rewards.RewardComponent, ...]: base_components = ( rewards.AsymptoticErrorComponent( name='altitude_error', prop=self.altitude_error_ft, state_variables=self.state_variables, target=0.0, is_potential_based=False, scaling_factor=self.ALTITUDE_SCALING_FT), rewards.AsymptoticErrorComponent( name='travel_direction', prop=self.track_error_deg, state_variables=self.state_variables, target=0.0, is_potential_based=False, scaling_factor=self.TRACK_ERROR_SCALING_DEG), # add an airspeed error relative to cruise speed component? ) return base_components