def run(num_envs=16, hidden_dim=256, batch_size=1024, iterations=1000, log_interval=10, runs=1): envs = [tl.make_nh_waypoint_3d() for i in range(num_envs)] envs = SubprocVecEnv(envs) t_env = tenv.WaypointEnv3D() state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] path = os.getcwd() + "/nh_waypoint_3d/" for i in range(runs): agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3) opt = torch.optim.Adam(agent.parameters(), lr=1e-4) ep, rew, agent = tl.train_mp(envs, t_env, agent, opt, batch_size, iterations, log_interval, render=False, fname=path + "gaussian_" + str(2)) if i == 0: csv_input = pd.DataFrame() csv_input["timesteps"] = ep csv_input["run" + str(i)] = rew csv_input.to_csv(path + "data.csv", index=False)
def run(hidden_dim=256): path = os.getcwd()+"/nh_waypoint_3d/" fname = path+"gaussian_2_term.pth.tar" t_env = tenv.WaypointEnv3D() state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3) print("Agent initialized, loading state dictionary.") agent.load_state_dict(torch.load(fname, map_location=lambda storage, loc: storage)) print() print("State dictionary loaded") k = [tl.test(t_env, agent, render=True) for _ in range(100)] rewards = sum(k)/len(k) print("Mean reward: ", rewards)
def run(hidden_dim=256): path = os.getcwd() + "/_2d/soft_2d/" fname = path + "3-wps_.pth.tar" t_env = tenv.TrajectoryEnv2D() state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=2, lookahead=3) print("Agent initialized, loading state dictionary.") agent.load_state_dict( torch.load(fname, map_location=lambda storage, loc: storage)) print() print("State dictionary loaded") k = [tl.test(t_env, agent, render=True) for _ in range(100)] rewards = sum(k) / len(k) print("Mean reward: ", rewards)
def run(num_envs=16, hidden_dim=256, batch_size=1024, iterations=1000, log_interval=10, runs=3): envs = [tl.make_traj_2d() for i in range(num_envs)] envs = SubprocVecEnv(envs) t_env = tenv.TrajectoryEnv2D() t_env.num_fut_wp = int(cfg.waypoints-1) state_size = 10+7*(t_env.num_fut_wp+1) t_env.observation_space = gym.spaces.Box(-1, 1, shape=(state_size,)) state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] path = os.getcwd()+"/_2d/traj_2d/" for i in range(runs): agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=2, lookahead=lookahead) opt = torch.optim.Adam(agent.parameters(), lr=cfg.lr) ep, rew, agent = tl.train_mp(envs, t_env, agent, opt, batch_size, iterations, log_interval, render=False, fname=path+wps+"-wps_") if i == 0: csv_input = pd.DataFrame() csv_input["timesteps"] = ep csv_input["run"+str(i)] = rew csv_input.to_csv(path+"data_wp-"+wps+".csv", index=False)
def run(hidden_dim=256): path = os.getcwd() + "/waypoint_3d/" fname = path + "gaussian_2_term.pth.tar" t_env = tenv.WaypointEnv3D() state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3) print("Agent initialized, loading state dictionary.") agent.load_state_dict( torch.load(fname, map_location=lambda storage, loc: storage)) print() print("State dictionary loaded") uvws, pqrs, ts = test(t_env, agent) fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) ax.plot(ts, uvws) ax.set_xlabel('time (s)') ax.set_ylabel('u (m/s)') ax.set_xlim([0, 3]) ax.set_ylim([-1.5, 1.5]) plt.savefig('./figures/value_density.png') print("figure saved")
def run(hidden_dim=256): path = os.getcwd() + "/waypoint_2d/" fname = path + "gaussian_2_term.pth.tar" t_env = tenv.WaypointEnv2D() state_dim = t_env.observation_space.shape[0] action_dim = t_env.action_space.shape[0] agent = ag.Agent(state_dim, hidden_dim, action_dim) print("Agent initialized, loading state dictionary.") agent.load_state_dict( torch.load(fname, map_location=lambda storage, loc: storage)) print() print("State dictionary loaded") xs = np.linspace(0, 3, 30) ys = np.linspace(-1.5, 1.5, 30) XS, YS = np.meshgrid(xs, ys) VALUE = np.zeros(XS.shape) for i, x in enumerate(xs): for j, y in enumerate(ys): arr = np.array([x, y]) state = torch.Tensor(arr).to(device) value = agent.get_integrated_value(state).item() VALUE[i, j] = exp(-value) fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) ax.pcolormesh(XS, YS, VALUE, cmap="plasma") ax.set_xlabel('body x distance (m)') ax.set_ylabel('body y distance (m)') ax.set_xlim([0, 3]) ax.set_ylim([-1.5, 1.5]) plt.savefig('./figures/value_density.png') print("figure saved")