示例#1
0
    def _select_assessor(self, base_components: Tuple[rewards.RewardComponent, ...],
                         shaping_components: Tuple[rewards.RewardComponent, ...],
                         shaping: Shaping) -> assessors.AssessorImpl:
        if shaping is Shaping.STANDARD:
            return assessors.AssessorImpl(base_components, shaping_components,
                                          positive_rewards=self.positive_rewards)
        else:
            no_sideslip = rewards.AsymptoticErrorComponent(name='no_sideslip',
                                                           prop=prp.sideslip_deg,
                                                           state_variables=self.state_variables,
                                                           target=0.0,
                                                           is_potential_based=True,
                                                           scaling_factor=self.SIDESLIP_ERROR_SCALING_DEG)
            potential_based_components = (no_sideslip)

        if shaping is Shaping.EXTRA:
            return assessors.AssessorImpl(base_components, potential_based_components,
                                          positive_rewards=self.positive_rewards)
        elif shaping is Shaping.EXTRA_SEQUENTIAL:
            altitude_error, travel_direction = base_components
            # make the wings_level shaping reward dependent on facing the correct direction
            dependency_map = {no_sideslip: (travel_direction,)}
            return assessors.ContinuousSequentialAssessor(base_components, potential_based_components,
                                                          potential_dependency_map=dependency_map,
                                                          positive_rewards=self.positive_rewards)
示例#2
0
 def _make_base_reward_components(
         self) -> Tuple[rewards.RewardComponent, ...]:
     base_components = (
         rewards.AsymptoticErrorComponent(
             name='altitude_error',
             prop=self.altitude_error_ft,
             state_variables=self.state_variables,
             target=0.0,
             is_potential_based=False,
             scaling_factor=self.ALTITUDE_SCALING_FT),
         rewards.AsymptoticErrorComponent(
             name='travel_direction',
             prop=self.track_error_deg,
             state_variables=self.state_variables,
             target=0.0,
             is_potential_based=False,
             scaling_factor=self.TRACK_ERROR_SCALING_DEG),
         # add an airspeed error relative to cruise speed component?
     )
     return base_components