示例#1
0
    def __init__(self, stepweite=0.01):
        super(WrapperOpenAI, self).__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # nur aileron
        high_action_space = np.array([1.], dtype=np.float32)
        self.action_space = spaces.Box(low=-high_action_space,
                                       high=high_action_space,
                                       dtype=np.float32)
        # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        high_observation_space = np.array([np.inf, np.inf, np.inf],
                                          dtype=np.float32)
        self.observation_space = spaces.Box(low=-high_observation_space,
                                            high=high_observation_space,
                                            dtype=np.float32)
        # reward
        self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32)
        # spezielle Parameter für das Enviroment FDM
        self.aircraft = Aircraft_baever()  # Ball oder C172
        self.pid = PidRegler()
        self.dynamicSystem = DynamicSystem6DoF()
        self.stepweite = stepweite
        self.udpClient = UdpClient('127.0.0.1', 5566)
        # frage: ist das die richtige Stelle, oder besser im DDPG-Controller
        self.targetValues = {
            'targetPhi_grad': 0,
            'targetTheta_grad': 0,
            'targetPsi': 0,
            'targetSpeed': 0,
            'target_z_dot': 0.0
        }
        self.envelopeBounds = {
            'phiMax_grad': 30,
            'phiMin_grad': -30,
            'thetaMax_grad': 30,
            'thetaMin_grad': -30,
            'speedMax': 72,
            'speedMin': 33
        }

        # fuer plotten
        self.plotter = PlotState()
        self.anzahlSteps = 0
        self.anzahlEpisoden = 0

        self.observationErrorAkkumulation = np.zeros(3)
        self.integration_error_stepsize_ = 0

        self.servo_command = 0
        self.action_servo_command_history = np.zeros(2)
        self.bandbreite_servo_actions = 0
示例#2
0
    def __init__(self, stepweite=0.05):
        super(WrapperOpenAI, self).__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # nur aileron
        high_action_space = np.array([1.], dtype=np.float32)
        self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32)
        # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        high_observation_space = np.array([np.inf, np.inf, np.inf, np.inf, np.inf], dtype=np.float32)
        self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32)
        # reward
        self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32)
        # spezielle Parameter für das Enviroment FDM
        #self.aircraft = Aircraft(config.geometrieClass)  # Ball oder C172
        self.aircraft_beaver = Aircraft_baever()
        self.pid = PidRegler()
        self.dynamicSystem = DynamicSystem6DoF()
        self.umrechnungenKoordinaten = UmrechnungenKoordinaten()
        self.stepweite = stepweite
        self.udpClient = UdpClient('127.0.0.1', 5566)
        # frage: ist das die richtige Stelle, oder besser im DDPG-Controller
        self.targetValues = {'targetPhi': 0,
                             'targetTheta': 0,
                             'targetPsi': 0,
                             'targetSpeed': 0,
                             'target_z_dot': 0.0}
        self.envelopeBounds = {'phiMax': 20,
                               'phiMin': -20,
                               'thetaMax': 10,
                               'thetaMin': -30,
                               'speedMax': 80,
                               'speedMin': 33
                               }

        self.integration_stepsize = 3
        self.error_array_u = np.zeros(self.integration_stepsize)
        self.integration_error_u = 0
        self.error_array_theta = np.zeros(self.integration_stepsize)
        self.integration_error_theta = 0

        # fuer plotten
        self.plotter = PlotState()
        self.anzahlSteps = 0
        self.anzahlEpisoden = 0
class WrapperOpenAI(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, stepweite=0.01):
        super(WrapperOpenAI, self).__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # nur aileron
        high_action_space = np.array([1.], dtype=np.float32)
        self.action_space = spaces.Box(low=-high_action_space,
                                       high=high_action_space,
                                       dtype=np.float32)
        # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        high_observation_space = np.array(
            [np.inf, np.inf, np.inf, np.inf, np.inf], dtype=np.float32)
        self.observation_space = spaces.Box(low=-high_observation_space,
                                            high=high_observation_space,
                                            dtype=np.float32)
        # reward
        self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32)
        # spezielle Parameter für das Enviroment FDM
        self.aircraft = Aircraft(config.geometrieClass)  # Ball oder C172
        self.pid = PidRegler()
        self.dynamicSystem = DynamicSystem6DoF()
        self.stepweite = stepweite
        self.udpClient = UdpClient('127.0.0.1', 5566)
        # frage: ist das die richtige Stelle, oder besser im DDPG-Controller
        self.targetValues = {
            'targetPhi_grad': 0,
            'targetTheta_grad': 0,
            'targetPsi': 0,
            'targetSpeed': 0,
            'target_z_dot': 0.0
        }
        self.envelopeBounds = {
            'phiMax': 20,
            'phiMin': -20,
            'thetaMax_grad': 30,
            'thetaMin_grad': -30,
            'speedMax': 72,
            'speedMin': 33
        }

        self.observationErrorAkkumulation = np.zeros(3)
        self.integration_error_stepsize_ = 0
        # fuer plotten
        self.plotter = PlotState()
        self.anzahlSteps = 0
        self.anzahlEpisoden = 0

        self.servo_command = 0
        self.action_servo_command_history = np.zeros(2)
        self.bandbreite_servo_actions = 0

    def reset(self):
        self.observationErrorAkkumulation = np.zeros(3)
        self.integration_error_stepsize_ = 0

        self.servo_command = 0
        self.action_servo_command_history = np.zeros(2)
        self.bandbreite_servo_actions = 0

        self.anzahlSteps = 1
        self.anzahlEpisoden += 1

        # set targets for different angles
        self.targetValues['targetTheta'] = np.random.uniform(-5, 5)
        print('new Target (deg): ', self.targetValues)

        # set state at initial
        phi_as_random = np.deg2rad(np.random.uniform(0, 0))
        theta_as_random = np.deg2rad(np.random.uniform(-3, 3))

        self.aircraft.setState(
            np.array([
                40.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random,
                theta_as_random, 0.0
            ]))
        observation = self.user_defined_observation(self.aircraft.getState())
        return observation  # reward, done, info can't be included

    def step(self, actionElevator):
        self.anzahlSteps += 1
        # action im Intervall [-1,1]
        # mapping auf Begrenzung der Steuerflächen
        self.servo_command = actionElevator[0]
        set_elevator = np.interp(
            actionElevator[0], [-1, 1],
            self.aircraft.SteuerflaechenUndMotorStellung.
            controlLimits['deltaElevatorBorder'])
        # print(set_aileron)
        self.aircraft.SteuerflaechenUndMotorStellung.deltaElevator = set_elevator
        # Headline: phi wird mit PID-Regler stabilisiert
        set_aileron = self.pid._innerLoopAileron(
            0, np.rad2deg(self.aircraft.phi), self.aircraft.p,
            self.aircraft.SteuerflaechenUndMotorStellung.deltaAileron)
        self.aircraft.SteuerflaechenUndMotorStellung.deltaAileron = set_aileron
        # Headline: integrate step
        solver = self.dynamicSystem.integrate(
            self.aircraft.getState(), self.aircraft.getForces(),
            self.aircraft.getMoments(), self.aircraft.mass,
            self.aircraft.inertia, self.stepweite
        )  # def integrate(self, state, mass, inertia, forces, moments, stepweite):
        # State1
        self.aircraft.setState(
            np.array([
                solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0],
                solver.y[4][0], solver.y[5][0], solver.y[6][0], solver.y[7][0],
                solver.y[8][0], solver.y[9][0], solver.y[10][0],
                solver.y[11][0]
            ]))
        observation = self.user_defined_observation(self.aircraft.getState())
        reward = self.compute_reward(self.aircraft.getState())
        done = self.check_done(self.aircraft.getState())
        # Headline: ab hier für plotten
        self.plotter.addData(
            self.aircraft.getState(), self.aircraft.getForces(),
            self.aircraft.getMoments(), self.aircraft.alpha,
            self.aircraft.beta,
            np.rad2deg(self.aircraft.SteuerflaechenUndMotorStellung.
                       getSteuerflaechenUndMotorStellung()), self.anzahlSteps +
            self.anzahlEpisoden * 400)  #Headline ist anzupassen

        return observation, reward, done, {}

    def render(self, mode='human'):
        self.udpClient.send((struct.pack('fff', self.aircraft.phi, 0, 0)))

    def close(self):
        pass

    def seed(self, seed=None):
        return

    def user_defined_observation(self, observation):
        # return: current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        current_theta_dot = observation[7]
        current_theta = observation[10]
        error_current_theta_to_target_theta = np.abs(
            np.rad2deg(observation[10]) - self.targetValues['targetTheta'])

        self.observationErrorAkkumulation = np.roll(
            self.observationErrorAkkumulation, 1)
        self.observationErrorAkkumulation[
            -1] = error_current_theta_to_target_theta
        self.integration_error_stepsize_ = np.add.reduce(
            self.observationErrorAkkumulation)

        self.action_servo_command_history = np.roll(
            self.action_servo_command_history,
            len(self.action_servo_command_history) - 1)
        self.action_servo_command_history[-1] = self.servo_command
        self.bandbreite_servo_actions = np.abs(
            np.min(self.action_servo_command_history) -
            np.max(self.action_servo_command_history))

        # enrichment of Observation: akkumulated error -> 1. rotate and add element 2. reduce to skalar
        observation = np.asarray([
            current_theta_dot, current_theta,
            error_current_theta_to_target_theta,
            self.integration_error_stepsize_,
            self.action_servo_command_history[0]
        ])

        return observation

    def compute_reward(self, observation):
        reward = 0
        # exceeds bounds [-20, 20] -> -100
        if np.rad2deg(observation[10]
                      ) > self.envelopeBounds['thetaMax'] or np.rad2deg(
                          observation[10]) < self.envelopeBounds['thetaMin']:
            reward += -100
        # Abweichung abs(target-current) > 1 -> -1
        if np.abs(
                np.rad2deg(observation[10]) -
                self.targetValues['targetTheta']) > 1:
            reward += -1
        # Abweichung abs(target-current) <= 1 -> 10
        if np.abs(
                np.rad2deg(observation[10]) -
                self.targetValues['targetTheta']) <= 1:
            reward += 10
        #reward += -0.01 * np.power((1 * self.action_servo_command_history[0] - 1 * self.action_servo_command_history[1]), 2)
        return reward

    def check_done(self, observation):
        done = 0
        # conditions_if_reset =  all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50])
        if observation[0] < self.envelopeBounds['speedMin'] or observation[
                0] > self.envelopeBounds['speedMax']:
            print("speed limits", observation[0])
            done = 1
        # conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50])
        if np.rad2deg(observation[9]) < self.envelopeBounds['phiMin'] or np.rad2deg(observation[9]) > \
                self.envelopeBounds['phiMax']:
            print("roll limits", np.rad2deg(observation[9]))
            done = 1
        # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']])
        if np.rad2deg(observation[10]) < self.envelopeBounds['thetaMin_grad'] or np.rad2deg(observation[10]) > \
                self.envelopeBounds['thetaMax_grad']:
            print("pitch limits", np.rad2deg(observation[10]))
            done = 1
        return done
示例#4
0
class WrapperOpenAI (gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, stepweite=0.02):
        super(WrapperOpenAI, self).__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # nur aileron
        high_action_space = np.array([1.], dtype=np.float32)
        self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32)
        # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        high_observation_space = np.array([np.inf, np.inf, np.inf, np.inf], dtype=np.float32)
        self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32)
        # reward
        self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32)
        # spezielle Parameter für das Enviroment FDM
        #self.aircraft = Aircraft(config.geometrieClass)  # Ball oder C172
        self.aircraft_beaver = Aircraft_baever()
        self.pid = PidRegler()
        self.dynamicSystem = DynamicSystem6DoF()
        self.umrechnungenKoordinaten = UmrechnungenKoordinaten()
        self.stepweite = stepweite
        self.udpClient = UdpClient('127.0.0.1', 5566)
        # frage: ist das die richtige Stelle, oder besser im DDPG-Controller
        self.targetValues = {'targetPhi_grad': 0,
                             'targetTheta_grad': 0,
                             'targetPsi': 0,
                             'targetSpeed': 0,
                             'target_z_dot': 0.0}
        self.envelopeBounds = {'phiMax_grad': 30,
                               'phiMin_grad': -30,
                               'thetaMax_grad': 30,
                               'thetaMin_grad': -30,
                               'speedMax': 72,
                               'speedMin': 33
                               }

        self.servo_command_elevator = 0
        self.action_servo_command_history_elevator = np.zeros(2)
        self.bandbreite_servo_actions_elevator = 0

        self.servo_command_aileron = 0
        self.action_servo_command_history_aileron = np.zeros(2)
        self.bandbreite_servo_actions_aileron = 0

        # fuer plotten
        self.plotter = PlotState()
        self.anzahlSteps = 0
        self.anzahlEpisoden = 0

    def reset(self):
        np.random.seed()
        self.servo_command_elevator = 0
        self.action_servo_command_history_elevator = np.zeros(2)
        self.bandbreite_servo_actions_elevator = 0

        self.servo_command_aileron = 0
        self.action_servo_command_history_aileron = np.zeros(2)
        self.bandbreite_servo_actions_aileron = 0

        self.anzahlSteps = 1
        self.anzahlEpisoden += 1

        # set targets
        self.targetValues['targetSpeed'] = np.random.uniform(40, 55)  # m/s
        self.targetValues['targetTheta_grad'] = np.random.uniform(0, 0)  # m/s
        self.targetValues['targetPhi_grad'] = np.random.uniform(0,0)  # m/s
        self.targetValues['target_z_dot'] = np.random.uniform(0, 0)  # m/s
        print('new Targets: ', self.targetValues)

        # set state at initial
        u_as_random = np.random.uniform(40, 55)
        phi_as_random = np.deg2rad(np.random.uniform(-0, 0))
        theta_as_random = np.deg2rad(np.random.uniform(-10, 10))

        self.aircraft_beaver.setState(
            np.array([u_as_random, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random, theta_as_random, 0.0]))
        observation = self.user_defined_observation(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks)
        return observation  # reward, done, info can't be included

    def step(self, action_command):
        self.servo_command_elevator = action_command[0]
        self.anzahlSteps += 1
        self.aircraft_beaver.delta_elevator = np.deg2rad(np.clip(self.servo_command_elevator, -1, 1) * (20))
        self.aircraft_beaver.delta_aileron = self.pid._innerLoopAileron(np.deg2rad(0), self.aircraft_beaver.phi, self.aircraft_beaver.p, self.aircraft_beaver.delta_aileron)
        # MultiAgent
        # MultiAgent
        self.servo_command_thrust = action_command[1]
        self.aircraft_beaver.delta_thrust = np.interp(self.servo_command_thrust, [-1, 1], [0.0, 1])  # Übersetzung der Ausgabe KNN zu Thrust-Setting
        # Headline: integrate step
        for x in range(10):
            solver = self.dynamicSystem.integrate(self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(),
                                                  self.aircraft_beaver.getMoments(),
                                                  self.aircraft_beaver.mass, self.aircraft_beaver.inertia,
                                                  self.stepweite)  # def integrate(self, state, mass, inertia, forces, moments, stepweite):
            # State1 in f_ks
            self.aircraft_beaver.setState(np.array(
                [solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0], solver.y[4][0], solver.y[5][0],
                 solver.y[6][0], solver.y[7][0], solver.y[8][0], solver.y[9][0], solver.y[10][0], solver.y[11][0]]))
        #set Values for g_ks
        self.aircraft_beaver.x_dot_g_ks, self.aircraft_beaver.y_dot_g_ks, self.aircraft_beaver.z_dot_g_ks = self.umrechnungenKoordinaten.flug2geo(
            [self.aircraft_beaver.u, self.aircraft_beaver.v, self.aircraft_beaver.w], self.aircraft_beaver.phi, self.aircraft_beaver.theta, self.aircraft_beaver.psi)
        observation = self.user_defined_observation(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks)
        reward = self.compute_reward(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks)
        done = self.check_done(self.aircraft_beaver.getState())
        # Headline: ab hier für plotten
        self.plotter.addData(self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(), self.aircraft_beaver.getMoments(),
                             self.aircraft_beaver.alpha, self.aircraft_beaver.beta,
                             np.rad2deg(
                                 self.aircraft_beaver.getSteuerflaechenUndMotorStellung()),
                             self.anzahlSteps + self.anzahlEpisoden * 1000)  # Headline ist anzupassen
        self.plotter.add_data_xyz([self.aircraft_beaver.x_geo, self.aircraft_beaver.y_geo, self.aircraft_beaver.z_geo], self.aircraft_beaver.z_dot_g_ks, self.anzahlSteps + self.anzahlEpisoden * 1000)
        self.plotter.add_data_Ziel(self.targetValues['targetSpeed'], self.anzahlSteps + self.anzahlEpisoden * 400)
        return observation, reward, done, {}

    def render(self, mode='human'):
        self.udpClient.send((struct.pack('fff', self.aircraft_beaver.phi, 0, 0)))


    def close(self):
        pass

    def seed(self, seed=None):
        return

    def user_defined_observation(self, aircraft_state_f_ks, z_dot_g_ks):
        current_u = aircraft_state_f_ks[0]
        error_current_u_to_target_u = current_u - self.targetValues['targetSpeed']

        current_theta_grad = np.rad2deg(aircraft_state_f_ks[10])



        user_defined_observation = np.asarray(
            [current_u, error_current_u_to_target_u, z_dot_g_ks, current_theta_grad])

        return user_defined_observation

    def compute_reward(self, aircraft_state_f_ks, z_dot_g_ks):
        reward0 = self.reward_elevator(aircraft_state_f_ks, z_dot_g_ks)
        reward1 = self.reward_thrust(aircraft_state_f_ks)
        return reward0, reward1

    def reward_elevator(self, aircraft_state_f_ks, z_dot_g_ks):
        current_theta_grad = np.rad2deg(aircraft_state_f_ks[10])
        reward0 = 0
        # out of bounds
        if current_theta_grad < self.envelopeBounds['thetaMin_grad'] or current_theta_grad > self.envelopeBounds[
            'thetaMax_grad']:
            reward0 += -1000
        # Zielgröße Sinken/steigen
        if np.abs(self.targetValues['target_z_dot'] - z_dot_g_ks) > 1:
            reward0 += -1
        else:
            reward0 += 100
        return reward0

    def reward_thrust(self, aircraft_state_f_ks):
        current_u = aircraft_state_f_ks[0]
        reward1 = 0
        # out of bounds
        if current_u < self.envelopeBounds['speedMin'] or current_u > self.envelopeBounds[
            'speedMax']:
            reward1 += -1000
        # Zielgröße Sinken/steigen
        if np.abs(self.targetValues['targetSpeed'] - current_u) > 1:
            reward1 += -1
        else:
            reward1 += 100
        return reward1

    def check_done(self, observation):
        done = 0
        # conditions_if_reset =  all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50])
        if observation[0] < self.envelopeBounds['speedMin'] or observation[0] > self.envelopeBounds['speedMax']:
            print("speed limits", observation[0])
            done = 1
        # conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50])
        if np.rad2deg(observation[9]) < self.envelopeBounds['phiMin_grad'] or np.rad2deg(observation[9]) > \
                self.envelopeBounds['phiMax_grad']:
            print("roll limits", np.rad2deg(observation[9]))
            done = 1
        # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']])
        if np.rad2deg(observation[10]) < self.envelopeBounds['thetaMin_grad'] or np.rad2deg(observation[10]) > \
                self.envelopeBounds['thetaMax_grad']:
            print("pitch limits", np.rad2deg(observation[10]))
            done = 1
        return done
示例#5
0
class WrapperOpenAI(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, stepweite=0.02):
        super(WrapperOpenAI, self).__init__()
        # Define action and observation space
        # They must be gym.spaces objects
        # nur aileron
        high_action_space = np.array([1.], dtype=np.float32)
        self.action_space = spaces.Box(low=-high_action_space,
                                       high=high_action_space,
                                       dtype=np.float32)
        # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error
        high_observation_space = np.array([np.inf, np.inf], dtype=np.float32)
        self.observation_space = spaces.Box(low=-high_observation_space,
                                            high=high_observation_space,
                                            dtype=np.float32)
        # reward
        self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32)
        # spezielle Parameter für das Enviroment FDM
        self.aircraft_beaver = Aircraft_baever()
        self.pid = PidRegler()
        self.dynamicSystem = DynamicSystem6DoF()
        self.stepweite = stepweite
        self.udpClient = UdpClient('127.0.0.1', 5566)
        # frage: ist das die richtige Stelle, oder besser im DDPG-Controller
        self.targetValues = {
            'targetPhi': 0,
            'targetTheta': 0,
            'targetPsi': 0,
            'targetSpeed': 0
        }
        self.envelopeBounds = {
            'phiMax': 20,
            'phiMin': -20,
            'thetaMax': 30,
            'thetaMin': -30,
            'speedMax': 72,
            'speedMin': 30
        }

        self.observationErrorAkkumulation = np.zeros(3)
        self.integration_error_stepsize_ = 0
        # fuer plotten
        self.plotter = PlotState()
        self.anzahlSteps = 0
        self.anzahlEpisoden = 0

        self.servo_command = 0
        self.action_servo_command_history = np.zeros(2)
        self.bandbreite_servo_actions = 0

    def reset(self):
        self.observationErrorAkkumulation = np.zeros(3)
        self.integration_error_stepsize_ = 0

        self.servo_command = 0
        self.action_servo_command_history = np.zeros(2)
        self.bandbreite_servo_actions = 0

        self.anzahlSteps = 1
        self.anzahlEpisoden += 1

        # set targets
        self.targetValues['targetSpeed'] = np.random.uniform(47, 63)
        print('new Target: ', self.targetValues)

        # set state at initial
        phi_as_random = np.deg2rad(np.random.uniform(0, 0))
        theta_as_random = np.deg2rad(np.random.uniform(2.5, 3.5))

        self.aircraft_beaver.setState(
            np.array([
                40.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random,
                theta_as_random, 0.0
            ]))
        observation = self.user_defined_observation(
            self.aircraft_beaver.getState())
        return observation  # reward, done, info can't be included

    def step(self, actionThrust):
        self.anzahlSteps += 1
        # action im Intervall [-1,1]
        # mapping auf Begrenzung der Steuerflächen
        self.servo_command = actionThrust[0]
        self.aircraft_beaver.delta_thrust = np.interp(
            actionThrust[0], [-1, 1],
            [0.0, 1])  # Übersetzung der Ausgabe KNN zu Thrust-Setting
        # Headline: theta wird mit PID-Regler stabilisiert
        self.aircraft_beaver.delta_elevator = self.pid._innerLoopElevator(
            np.deg2rad(0), self.aircraft_beaver.theta, self.aircraft_beaver.q,
            self.aircraft_beaver.delta_elevator)
        # Headline: phi wird mit PID-Regler stabilisiert
        self.aircraft_beaver.delta_aileron = self.pid._innerLoopAileron(
            np.deg2rad(0), self.aircraft_beaver.phi, self.aircraft_beaver.p,
            self.aircraft_beaver.delta_aileron)
        # Headline: integrate step
        solver = self.dynamicSystem.integrate(
            self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(),
            self.aircraft_beaver.getMoments(), self.aircraft_beaver.mass,
            self.aircraft_beaver.inertia, self.stepweite
        )  # def integrate(self, state, mass, inertia, forces, moments, stepweite):
        # State1
        self.aircraft_beaver.setState(
            np.array([
                solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0],
                solver.y[4][0], solver.y[5][0], solver.y[6][0], solver.y[7][0],
                solver.y[8][0], solver.y[9][0], solver.y[10][0],
                solver.y[11][0]
            ]))
        observation = self.user_defined_observation(
            self.aircraft_beaver.getState())
        reward = self.compute_reward(self.aircraft_beaver.getState())
        done = self.check_done(self.aircraft_beaver.getState())
        # Headline: ab hier für plotten
        self.plotter.addData(
            self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(),
            self.aircraft_beaver.getMoments(), self.aircraft_beaver.alpha,
            self.aircraft_beaver.beta,
            (self.aircraft_beaver.getSteuerflaechenUndMotorStellung()),
            self.anzahlSteps +
            self.anzahlEpisoden * 1000)  #Headline ist anzupassen

        return observation, reward, done, {}

    def render(self, mode='human'):
        self.udpClient.send((struct.pack('fff', self.aircraft_beaver.phi, 0,
                                         0)))

    def close(self):
        pass

    def seed(self, seed=None):
        return

    def user_defined_observation(self, aircraft_state_f_ks):
        current_speed = aircraft_state_f_ks[0]
        error_current_speed_to_target_speed = aircraft_state_f_ks[
            0] - self.targetValues['targetSpeed']

        self.action_servo_command_history = np.roll(
            self.action_servo_command_history,
            len(self.action_servo_command_history) - 1)
        self.action_servo_command_history[-1] = self.servo_command
        self.bandbreite_servo_actions = np.abs(
            np.min(self.action_servo_command_history) -
            np.max(self.action_servo_command_history))

        aircraft_state_f_ks = np.asarray(
            [current_speed, error_current_speed_to_target_speed])

        return aircraft_state_f_ks

    def compute_reward(self, aircraft_state_f_ks):
        current_u = aircraft_state_f_ks[0]
        reward1 = 0
        if current_u < self.envelopeBounds[
                'speedMin'] or current_u > self.envelopeBounds['speedMax']:
            reward1 += -1000
        if np.abs(current_u - self.targetValues['targetSpeed']) > 1:
            reward1 += -1
        else:
            reward1 += 10
        return reward1

    def check_done(self, observation):
        done = 0
        #conditions_if_reset =  all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50])
        if observation[0] < 30 or observation[0] > 80:
            print("speed limits")
            done = 1
        #conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50])
        if self.envelopeBounds['phiMin'] > np.rad2deg(
                observation[9]) or np.rad2deg(
                    observation[9]) > self.envelopeBounds['phiMax']:
            print("roll limits", np.rad2deg(observation[9]))
            done = 1
        # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']])
        if self.envelopeBounds['thetaMin'] > np.rad2deg(
                observation[10]) or np.rad2deg(
                    observation[10]) > self.envelopeBounds['thetaMax']:
            print("pitch limits", np.rad2deg(observation[10]))
            done = 1
        return done
示例#6
0
from FDM.physikFDM.aircraft import Aircraft
from FDM.lfz_controlFDM.pid_regler import PidRegler
from FDM.servicesFDM.umrechnungen_koordinaten import UmrechnungenKoordinaten
from FDM.servicesFDM.plot_state import PlotState
from FDM.physikFDM.dynamic_system6DoF import *
from FDM.config import *
from FDM.servicesFDM.udp_client import UdpClient
from FDM.physikFDM.aircraft_beaver import Aircraft_baever

config = Config()
umrechnungenKoordinaten = UmrechnungenKoordinaten()
plotter = PlotState()
# todo: config anpassen
#aircraft = Aircraft(config.geometrieClass)  # Ball oder C172
aircraft = Aircraft_baever()
pid = PidRegler()
dynamicSystem = DynamicSystem6DoF()
udpClient = UdpClient('127.0.0.1', 5566)

#zeitmessung für develop
import time

summe = 0
start = time.time()
#zeitmessung für develop

#todo: echtzeitanpassung über time und pause...
stepweite = 0.02
steps = 2000
# set initial conditions of a/c
aircraft.setState([40., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])