def test_position_scalars_translation(): Lx = Ly = 0.5e3 # [m] Lz = 1.0e3 # [m] dx = dy = dz = 25.0 # [m] ds_grid = create_uniform_grid(dL=(dx, dy, dz), L=(Lx, Ly, Lz)) ds_grid.attrs["xy_periodic"] = True nx, ny = int(ds_grid.x.count()), int(ds_grid.y.count()) ds = init_position_scalars(ds=ds_grid) for i_shift in [0, 2, int(Lx / dx)]: ds_shifted = ds.roll(x=i_shift, roll_coords=False) ds_grid_idxs = advtraj_gm_utils.estimate_initial_grid_indecies( ds_position_scalars=ds_shifted, N_grid=dict(x=nx, y=ny)) ds_grid_idxs_ = ds_grid_idxs.isel(y=0, z=0) ds_traj_posn_new = advtraj_gm_utils.estimate_3d_position_from_grid_indecies( ds_grid=ds_shifted, i=ds_grid_idxs_.i, j=ds_grid_idxs_.j, k=ds_grid_idxs_.k) x_ref = ds_shifted.x x_est = ds_traj_posn_new.roll(x=-i_shift, roll_coords=False).x_est assert np.allclose(x_ref, x_est)
def run_sarsa(): # please do not modify the line below env = UnityEnvironment(file_name="./Banana_Linux/Banana.x86_64") brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] # reset the environment low, high = min(env_info.vector_observations[0]), max( env_info.vector_observations[0]) state_grid = create_uniform_grid(low, high, bins=30) state_grid = np.array(state_grid) q_agent = QLearningAgent(env, state_grid, brain_name, alpha=0.02, gamma=0.99, epsilon=1.0, epsilon_decay_rate=0.9995, min_epsilon=.01, seed=505) scores = run(q_agent, env)
def test_cyclic_coord_wrapping(grid_style): dx = 25.0 dL = (dx, dx, dx) L = (1.0e3, 1.0e3, 500.0) ds_grid = create_uniform_grid(dL=dL, L=L, grid_style=grid_style) Lx_c, Ly_c, Lz_c = [L[0] / 2.0, L[1] / 2.0, L[2] / 2.0] Lx, Ly, Lz = L start_and_wrapped_pt_coords = [ # a point in the center of the domain should remain the same ((Lx_c, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)), # wrapping in x should map these points to domain center ((Lx_c - Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)), ((Lx_c + Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)), # same in y ((Lx_c, Ly_c - Ly, Lz_c), (Lx_c, Ly_c, Lz_c)), ((Lx_c, Ly_c + Ly, Lz_c), (Lx_c, Ly_c, Lz_c)), # repeats for two wraps ((Lx_c - 2 * Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)), ((Lx_c + 2 * Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)), ((Lx_c, Ly_c - 2 * Ly, Lz_c), (Lx_c, Ly_c, Lz_c)), ((Lx_c, Ly_c + 2 * Ly, Lz_c), (Lx_c, Ly_c, Lz_c)), ] def _make_pt_dataset(pt): ds_pt = xr.Dataset() for n, v in enumerate(["x", "y", "z"]): ds_pt[v] = pt[n] return ds_pt def _pt_from_dataset(ds_pt): return np.array([ds_pt[v] for v in ["x", "y", "z"]]) if grid_style == "cell_centered": cell_centered_coords = ["x", "y", "z"] elif grid_style == "monc": cell_centered_coords = ["x", "y"] else: cell_centered_coords = [] for pt_start, pt_wrapped_correct in start_and_wrapped_pt_coords: ds_pt_start = _make_pt_dataset(pt_start) ds_pt_wrapped = grid_utils.wrap_periodic_grid_coords( ds_grid=ds_grid, ds_posn=ds_pt_start, cyclic_coords=("x", "y"), cell_centered_coords=cell_centered_coords, ) np.testing.assert_allclose(_pt_from_dataset(ds_pt_wrapped), pt_wrapped_correct)
def test_position_scalar_transforms_are_symmetric(): """ Test that transforms both to and from the "position scalars" are symmetric """ Lx = Ly = 0.5e3 # [m] Lz = 1.0e3 # [m] dx = dy = dz = 25.0 # [m] ds_grid = create_uniform_grid(dL=(dx, dy, dz), L=(Lx, Ly, Lz)) nx, ny = int(ds_grid.x.count()), int(ds_grid.y.count()) N_pts = 5 ds_pts = xr.Dataset(coords=dict(pt=np.arange(N_pts))) ds_pts["x"] = "pt", np.linspace(ds_grid.x.min(), ds_grid.x.max(), N_pts) - dx / 2.0 ds_pts["y"] = "pt", np.linspace(ds_grid.y.min(), ds_grid.y.max(), N_pts) - dy / 2.0 ds_pts["z"] = "pt", np.linspace(ds_grid.z.min(), ds_grid.z.max(), N_pts) - dz / 2.0 for xy_periodic in [True, False]: ds_grid["xy_periodic"] = xy_periodic ds_position_scalars_pts = advtraj_gm_utils.grid_locations_to_position_scalars( ds_grid=ds_grid, ds_pts=ds_pts) ds_grid_idxs_pts = advtraj_gm_utils.estimate_initial_grid_indecies( ds_position_scalars=ds_position_scalars_pts, N_grid=dict(x=nx, y=ny)) ds_pts_est = advtraj_gm_utils.estimate_3d_position_from_grid_indecies( ds_grid=ds_grid, i=ds_grid_idxs_pts.i, j=ds_grid_idxs_pts.j, k=ds_grid_idxs_pts.k, ) np.testing.assert_allclose(ds_pts.x, ds_pts_est.x_est) np.testing.assert_allclose(ds_pts.y, ds_pts_est.y_est) np.testing.assert_allclose(ds_pts.z, ds_pts_est.z_est)
def preprocess_state(state, state_grid): """Map a continuous state to its discretized representation.""" return discretize(state, state_grid) if __name__ == "__main__": # please do not modify the line below env = UnityEnvironment(file_name="./Banana_Linux/Banana.x86_64") brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] # reset the environment low, high = min(env_info.vector_observations[0]), max( env_info.vector_observations[0]) state_grid = create_uniform_grid(low, high, bins=30) state_grid = np.array(state_grid) q_agent = QLearningAgent(env, state_grid, brain_name, alpha=0.02, gamma=0.99, epsilon=1.0, epsilon_decay_rate=0.9995, min_epsilon=.01, seed=505) scores = run(q_agent, env)