Пример #1
0
    def __init__(self):

        self.chaser = Drone()
        self.target = Drone()
        self.target_controller = controller(self.target.get_arm_length(), self.target.get_mass())

        self.state_chaser = np.zeros(13)
        self.state_target = np.zeros(13)
        self.rel_state = np.zeros(12)
        self.t = 0

        self.done = False
        self.reward = 0.0
        self.shaping = 0.0
        self.last_shaping = 0.0

        # self.steps_beyond_done = None

        # Chaser Initial State
        chaser_ini_pos = np.array([8, -50, 5]) + np.random.uniform(-0.3, 0.3, (3,))
        chaser_ini_vel = np.array([0, 0, 0])# + np.random.uniform(-0.1, 0.1, (3,))
        chaser_ini_att = euler2quat(np.array([0.0, 0.0, 0.0]))# + np.random.uniform(-0.2, 0.2, (3,)))
        chaser_ini_angular_rate = np.array([0.0, 0.0, 0.0]) #+ np.random.uniform(-0.1, 0.1, (3,))
        self.chaser_dock_port = np.array([0.1, 0.0, 0.0])
        self.chaser_ini_state = np.zeros(13)
        self.chaser_ini_state[0:3] = chaser_ini_pos
        self.chaser_ini_state[3:6] = chaser_ini_vel
        self.chaser_ini_state[6:10] = chaser_ini_att
        self.chaser_ini_state[10:] = chaser_ini_angular_rate
        self.state_chaser = self.chaser.reset(self.chaser_ini_state, self.chaser_dock_port)

        # Target Initial State
        target_ini_pos = np.array([10, -50, 5])
        target_ini_vel = np.array([0.0, 0.0, 0.0])
        target_ini_att = euler2quat(np.array([0.0, 0.0, 0.0]))
        target_ini_angular_rate = np.array([0.0, 0.0, 0.0])
        self.target_dock_port = np.array([-0.1, 0, 0])
        self.target_ini_state = np.zeros(13)
        self.target_ini_state[0:3] = target_ini_pos
        self.target_ini_state[3:6] = target_ini_vel
        self.target_ini_state[6:10] = target_ini_att
        self.target_ini_state[10:] = target_ini_angular_rate
        self.state_target = self.target.reset(self.target_ini_state, self.target_dock_port)

        # Target Final State
        target_pos_des = np.array([10, -50, 5])  # [x, y, z]
        target_att_des = euler2quat(np.array([0.0, 0.0, 0.0]))
        self.target_state_des = np.zeros(13)
        self.target_state_des[0:3] = target_pos_des
        self.target_state_des[6:10] = target_att_des

        # Final Relative Error
        self.rel_pos_threshold = 1
        self.rel_vel_threshold = 0.1
        self.rel_att_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)])
        self.rel_att_rate_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)])

        # chaser_dp = self.chaser.get_dock_port_state()  # drone A
        # target_dp = self.target.get_dock_port_state()  # drone B
        self.rel_state = state2rel(self.state_chaser, self.state_target, self.chaser.get_dock_port_state(),
                                   self.target.get_dock_port_state())

        # State Limitation
        chaser_low = self.chaser.state_lim_low
        chaser_high = self.chaser.state_lim_high

        target_low = self.target.state_lim_low
        target_high = self.target.state_lim_high

        # obs rel info: 12x1 [rel_pos, rel_vel, rel_rpy, rel_rpy_rate]
        self.obs_low = np.array(
            [-np.inf, -np.inf, -np.inf, -100, -100, -100, -np.pi, -np.pi / 2, -np.pi, -10 * np.pi, -10 * np.pi,
             -10 * np.pi])
        self.obs_high = np.array(
            [np.inf, np.inf, np.inf, 100, 100, 100, np.pi, np.pi / 2, np.pi, 10 * np.pi, 10 * np.pi, 10 * np.pi])

        # rel_low = np.array([60, 0, 100, 10, 10, 10, 1, 1, 1, 1, 10 * 2 * np.pi, 10 * 2 * np.pi, 10 * 2 * np.pi])

        self.action_space = spaces.Box(low=np.array([-1.0, -1.0, -1.0, -1.0]), high=np.array([1.0, 1.0, 1.0, 1.0]),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=self.obs_low, high=self.obs_high, dtype=np.float32)

        # self.action_max = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity
        self.action_mean = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0
        self.action_std = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0

        self.seed()
Пример #2
0
env = gym.make('gym_docking:docking-v1')

total_step = 1500
rel_state = np.zeros((total_step, 12))
state = np.zeros((total_step, 12))
rpy = np.zeros((total_step, 3))
time = np.zeros(total_step)
u_all = np.zeros((total_step, 4))

done = False
tf = 0
info_lst = []
rewards = []
obs = env.reset()

control = controller(env.chaser.get_arm_length(), env.chaser.get_mass())
state_des = env.chaser_ini_state

kp = 0.35
kd = 0


def generate_PID_expert_traj(save_path=None,
                             env=None,
                             n_timesteps=0,
                             n_episodes=1500,
                             image_folder='recorded_images'):
    """
    Train expert controller (if needed) and record expert trajectories.

    .. note::
Пример #3
0
ini_state[6:10] = ini_att
ini_state[10:] = ini_angular_rate

pos_des = np.array([10, -50, 5])  # [x, y, z]
vel_des = np.array([0, 0, 0])
att_des = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)]))
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[3:6] = vel_des
state_des[6:10] = att_des

# Initial a drone and set its initial state
quad1 = Drone()
quad1.reset(ini_state)

control = controller(quad1.get_arm_length(), quad1.get_mass())

# Control Command
u = np.zeros(quad1.dim_u)
# u[0] = quad1.get_mass() * 9.81
# u[3] = 0.2

total_step = 1500
state = np.zeros((total_step, 13))
state_des_all = np.zeros((total_step, 13))
rpy = np.zeros((total_step, 3))
time = np.zeros(total_step)
u_all = np.zeros((total_step, 4))

kp = 0.35
kd = 0
Пример #4
0
import gym
from gym import wrappers, logger
import numpy as np
from controller.PIDController import controller
from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, euler2quat

att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)]))
pos_des = np.array([0, 0, 1])  # [x, y, z]
state_des = np.zeros(13)
state_des[0:3] = pos_des
state_des[6:10] = att_des
control = controller(0.086, 0.18)

env = gym.make('gym_docking:hovering-v0')

obs = env.reset()
for i in range(1000):
    env.seed(0)
    action = control.PID(state_des, obs)
    obs, reward, dones, info = env.step(action)
    # print('obs: ', obs)
    print('reward: ', reward)
    print('dones: ', dones)