def __init__(self): self.my_heli = Helicopter() self.my_contr = Controller() self.ENV_ID = "CustomEnv-v0" # ENV_ID = "CartPole-v0" self.env = gym.make(self.ENV_ID) self.env.current_states = self.env.reset()
class state_finder: def __init__(self): self.my_heli = Helicopter() self.my_contr = Controller() self.ENV_ID = "CustomEnv-v0" # ENV_ID = "CartPole-v0" self.env = gym.make(self.ENV_ID) self.env.current_states = self.env.reset() def get_action(self, current_states, sl_action=[1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1]): action = self.my_contr.Controller_model(current_states, self.env.dt * self.env.counter, sl_action) return action
def __init__(self): self.U_input = [U1, U2, U3, U4] = sp.symbols("U1:5", real=True) self.x_state = [ u_velocity, v_velocity, w_velocity, p_angle, q_angle, r_angle, fi_angle, theta_angle, si_angle, xI, yI, zI, a_flapping, b_flapping, c_flapping, d_flapping, ] = sp.symbols("x1:17", real=True) self.My_helicopter = Helicopter() self.My_controller = Controller() self.t = sp.symbols("t") self.symbolic_states_math, jacobian = self.My_helicopter.lambd_eq_maker(self.t, self.x_state, self.U_input) self.default_range = default_range = (-20, 20) self.observation_space_domain = { "u_velocity": default_range, "v_velocity": default_range, "w_velocity": default_range, "p_angle": default_range, "q_angle": default_range, "r_angle": default_range, "fi_angle": default_range, "theta_angle": default_range, "si_angle": default_range, "xI": default_range, "yI": default_range, "zI": default_range, "a_flapping": default_range, "b_flapping": default_range, "c_flapping": default_range, "d_flapping": default_range, } self.low_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[0], dtype=np.float32) self.high_obs_space = np.array(list(zip(*self.observation_space_domain.values()))[1], dtype=np.float32) self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=np.float32) self.default_act_range = default_act_range = (-0.3, 0.3) self.action_space_domain = { "deltacol": default_act_range, "deltalat": default_act_range, "deltalon": default_act_range, "deltaped": default_act_range, # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10), # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5), # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5), } self.low_action_space = np.array(list(zip(*self.action_space_domain.values()))[0], dtype=np.float32) self.high_action_space = np.array(list(zip(*self.action_space_domain.values()))[1], dtype=np.float32) self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=np.float32) self.min_reward = -13 self.t_start, self.dt, self.t_end = 0, 0.03, 2 self.no_timesteps = int((self.t_end - self.t_start) / self.dt) self.all_t = np.linspace(self.t_start, self.t_end, self.no_timesteps) self.counter = 0 self.best_reward = -100_000_000 self.longest_num_step = 0 self.reward_check_time = 0.7 * self.t_end self.high_action_diff = 0.2 obs_header = str(list(self.observation_space_domain.keys()))[1:-1] act_header = str(list(self.action_space_domain.keys()))[1:-1] self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward" self.saver = save_files() self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=np.float32)
# import csv import gym from env.Helicopter import Helicopter from env.controller import Controller from finding_random_states_and_actions import state_finder from simple_pid import PID import numpy as np my_state_finder = state_finder() my_heli = Helicopter() my_contr = Controller() ENV_ID = "CustomEnv-v0" my_env = gym.make(ENV_ID) current_best_rew = -100000000000 # print(sl_action) done = False # [0.8,0,0] lateral k_bin = np.linspace(-1, 1, num=10) i_bin = np.linspace(-1, 1, num=5) current_action = [0, 0, 0, 0] sl_action = [1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1] # pid_lat = PID(3.2, 2, -3.5, setpoint=-1.08e-01) # pid_lon = PID(0.2, 0, 0, setpoint=0) # pid_col = PID(-2, -3, -0.3, setpoint=-1) # pid_ped = PID(-1, -1, -0.5, setpoint=0) for i in range((len(k_bin))): for j in range((len(i_bin))): kk = k_bin[i]
epochs = 200000 hidden_size = 400 model = ActorCritic(inputDim, outputDim, hidden_size).to(device) model.load_state_dict(torch.load("model.pt")) model.eval() def trch_ft_device(input, device): output = torch.FloatTensor(input).to(device) return output my_state_finder = state_finder() my_heli = Helicopter() my_contr = Controller() ENV_ID = "CustomEnv-v0" my_env = gym.make(ENV_ID) current_best_rew = -100000000000 done = False my_env.current_states = my_env.reset() while not done: state = trch_ft_device(my_env.current_states, device) state = torch.FloatTensor(state).unsqueeze(0).to(device) dist, value = model(state) current_action = dist.sample() my_env.current_states, b, done, _ = my_env.step(list(current_action.numpy().squeeze())) if my_env.best_reward > current_best_rew: current_best_rew = my_env.best_reward # print("updated", sl_action)
import gym from env.Helicopter import Helicopter from env.controller import Controller from finding_random_states_and_actions import state_finder from simple_pid import PID import numpy as np my_state_finder = state_finder() my_heli = Helicopter() my_contr = Controller() ENV_ID = "CustomEnv-v0" my_env = gym.make(ENV_ID) current_best_rew = -100000000000 # print(sl_action) done = False # [0.8,0,0] lateral k_bin = np.linspace(-1, 1, num=10) i_bin = np.linspace(-1, 1, num=5) current_action = [0, 0, 0, 0] sl_action = [1, 10, 10, 5, 5, 1, 1, 2, 2, 1, 1, 1, 1] # pid_lon = PID(kk, ii, 0, setpoint=-1.08e-01) pid_lon = PID(0.2, 0.2, 0.01, setpoint=1.01e-04) # controls q, theta pid_lat = PID(0.2, 0.1, 0.01, setpoint=-1.08e-01) # controls p, fi pid_col = PID(-2, -3, -0.3, setpoint=0) pid_ped = PID(-1, -1, -0.5, setpoint=-1.03e-03) my_env.current_states = my_env.reset() done = False