def _normalise_error(self, angular_error: float): """ Given an angle off of a target direction in degrees, calculates a normalised error in [0,1]. The angular error is firstly transformed to interval [-180,180] to account for the fact the agent can turn left or right to face the target. :param angular_error: float, angle off target in degrees :return: float, normalised error in [0,1] """ reduced_angle_error = abs(reduce_reflex_angle_deg(angular_error)) return super()._normalise_error(reduced_angle_error)
def test_observe_first_state_correct_track_error(self): self.setUp() sim = SimStub.make_valid_state_stub(self.task) _ = self.task.observe_first_state(sim) track_deg = prp.Vector2.from_sim(sim).heading_deg() target_track_deg = sim[self.task.target_track_deg] error_deg = track_deg - target_track_deg expected_acute_error_deg = utils.reduce_reflex_angle_deg(error_deg) actual_error_deg = sim[self.task.track_error_deg] self.assertAlmostEqual(expected_acute_error_deg, actual_error_deg)
def _get_reward(self, sim: Simulation, last_state: NamedTuple, action: NamedTuple, new_state: NamedTuple) -> float: # Get negative reward proportional to normalised heading and altitude errors track_deg = prp.Vector2(last_state.velocities_v_east_fps, last_state.velocities_v_north_fps).heading_deg() normalised_error_track_deg = math.fabs(utils.reduce_reflex_angle_deg(track_deg - self.INITIAL_HEADING_DEG)) / 180.0 normalised_altitude_error = min(math.fabs(last_state.position_h_sl_ft - self.TARGET_ALTITUDE_FT) / self.INITIAL_ALTITUDE_FT, 1.0) target_reward = - normalised_error_track_deg - normalised_altitude_error # Get negative reward proportional to normalised speed angles and vertical speed normalised_angle_speed = min((math.fabs(last_state.velocities_p_rad_sec) + math.fabs(last_state.velocities_q_rad_sec) + math.fabs(last_state.velocities_r_rad_sec)) / (3*2*math.pi), 1.0) normalised_vertical_speed = min(math.fabs(last_state.velocities_v_down_fps) / self.INITIAL_ALTITUDE_FT, 1.0) stabilisation_reward = - math.exp(- sim[self.nb_episodes] / 100) * (normalised_angle_speed + normalised_vertical_speed) return target_reward + stabilisation_reward
def _update_track_error(self, sim: Simulation): v_north_fps, v_east_fps = sim[prp.v_north_fps], sim[prp.v_east_fps] track_deg = prp.Vector2(v_east_fps, v_north_fps).heading_deg() target_track_deg = sim[self.target_track_deg] error_deg = utils.reduce_reflex_angle_deg(track_deg - target_track_deg) sim[self.track_error_deg] = error_deg
def _update_heading_error(self, sim: Simulation): error_deg = utils.reduce_reflex_angle_deg( sim[self.target_heading_deg] - sim[prp.heading_deg]) sim[self.heading_error_deg] = error_deg