Exemplo n.º 1
0
    def __init__(self, data_folder, config_file, play):
        self.render_filename = None
        self.mass = None
        self.inertia_tensor = None
        self.rotors = []
        self.wing = None

        # initialize constant value
        self.gravity = np.array([0, 0, 9.8])
        self.dt_mean = 0.01
        self.dt_std = 0.005
        self.total_iter = 2000

        # parse xml config file
        self.parse_config_file(data_folder + config_file)

        # construct rendering environment
        if play:
            from mujoco_rendering_env import mujoco_env
            self.render_env = mujoco_env.MujocoEnv(model_path=data_folder +
                                                   self.render_filename)
        else:
            self.render_env = None
        self.render_intervel = int(1.0 / 50.0 / self.dt_mean)

        # self.render_env._get_viewer()

        # noise related
        self.noisy_body = True
        self.noisy_sensor = True
        self.state_noise_std = np.array([
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.005, 0.005, 0.005, 0.0005, 0.0005,
            0.0005
        ])
        self.noisy_rotor = False
        self.noisy_aerodynamics = True
        self.simulate_delay = True
        self.delay = 0.04
        self.delay_mean = 0.04
        self.delay_std = 0.01
        self.noisy_dt = True
        self.constrain_motor_output = True
        self.motor_constrain_clip = 0.4
        self.real_rotors = []
        for i in range(len(self.rotors)):
            self.real_rotors.append(
                Rotor(self.rotors[i].position, self.rotors[i].direction,
                      self.rotors[i].clockwise, self.rotors[i].torque_coef))

        # integral term
        self.I_dt = self.dt_mean
        self.I_error = None

        # mass property
        self.real_mass = self.mass
        self.real_inertia_tensor = self.inertia_tensor.copy()

        # initialize rigid body
        self.rigid_body = rigid_body.RigidBody(
            mass=self.mass, inertia_body=self.inertia_tensor)
        self.state = None

        # train or play
        self.play = play

        # training variables
        self.seed()
        self.iter = None
        self.epoch = 0
        self.timesofar = None
        self.target = np.zeros(4)
        self.state_his = None
        self.I_error = np.zeros(4)

        # construct action space
        self.max_thrust = 7.0
        action_low = np.ones(len(self.rotors)) * -1.0 * self.max_thrust / 2.0
        action_high = np.ones(len(self.rotors)) * self.max_thrust / 2.0
        self.action_space = spaces.Box(action_low,
                                       action_high,
                                       dtype=np.float32)

        # construct observation space
        ob = self.get_observation_vector()
        ob_low = np.ones(len(ob)) * (-np.finfo(np.float32).max)
        ob_high = np.ones(len(ob)) * (np.finfo(np.float32).max)
        self.observation_space = spaces.Box(ob_low, ob_high, dtype=np.float32)
Exemplo n.º 2
0
    def reset(self):
        # generate epoch-level noise
        self.reset_noise()

        # set initial state
        initial_pos = np.array([0, 0, -4])

        initial_orientation = Quaternion(axis=[1, 0, 0], angle=0)

        axis = np.random.uniform(low=-1.0, high=1.0, size=3)
        angle = np.random.normal(0.0, 0.2)
        initial_orientation = Quaternion(axis=axis, angle=angle)

        initial_velocity = np.zeros(3)

        initial_velocity = np.random.normal(0.0, 0.5, size=3)

        initial_omega = np.zeros(3)

        initial_omega = np.random.normal(0.0, 0.05, size=3)

        self.rigid_body = rigid_body.RigidBody(mass = self.mass, inertia_body = self.inertia_tensor, \
                                                position = initial_pos, orientation = initial_orientation, \
                                                velocity = initial_velocity, angular_velocity = initial_omega)

        mode = self.np_random.uniform(low=0.0, high=2.0)
        if mode < 1.0:
            self.target_vx = self.np_random.uniform(low=3.0, high=6.0)
            self.target_vy = 0.0
            self.target_vz = self.np_random.uniform(low=-1.0, high=1.0)
        else:
            self.target_vx = self.np_random.uniform(low=-1.0, high=1.0)
            self.target_vy = self.np_random.uniform(low=-1.0, high=1.0)
            self.target_vz = self.np_random.uniform(low=-1.0, high=1.0)
        self.target = np.array(
            [self.target_vx, self.target_vy, self.target_vz, 0])

        # training variables
        self.epoch += 1
        self.iter = 0
        self.timesofar = 0
        self.last_action = np.zeros(len(self.rotors))
        self.I_error = np.zeros(4)
        self.I_error = np.random.normal(0.0, 0.5, size=4)

        # for visualization
        self.accumulate_reward = 0
        self.rpy_his = [[], [], []]
        self.vel_local_his = [[], [], []]
        self.target_vel_his = [[], [], []]
        self.action_his = [[], [], [], []]
        self.omega_his = [[], [], []]
        self.state_his = []
        self.time_his = []
        self.error_his = [[], [], [], []]
        self.I_error_his = [[], [], [], []]
        self.aoa_his = []
        self.reward_his = None

        self.update_state()
        observation_vector = self.get_observation_vector()

        #       print('observation_vector is: ',observation_vector)

        return observation_vector