def test_position_scalars_translation():
    Lx = Ly = 0.5e3  # [m]
    Lz = 1.0e3  # [m]
    dx = dy = dz = 25.0  # [m]

    ds_grid = create_uniform_grid(dL=(dx, dy, dz), L=(Lx, Ly, Lz))
    ds_grid.attrs["xy_periodic"] = True
    nx, ny = int(ds_grid.x.count()), int(ds_grid.y.count())

    ds = init_position_scalars(ds=ds_grid)

    for i_shift in [0, 2, int(Lx / dx)]:
        ds_shifted = ds.roll(x=i_shift, roll_coords=False)

        ds_grid_idxs = advtraj_gm_utils.estimate_initial_grid_indecies(
            ds_position_scalars=ds_shifted, N_grid=dict(x=nx, y=ny))

        ds_grid_idxs_ = ds_grid_idxs.isel(y=0, z=0)

        ds_traj_posn_new = advtraj_gm_utils.estimate_3d_position_from_grid_indecies(
            ds_grid=ds_shifted,
            i=ds_grid_idxs_.i,
            j=ds_grid_idxs_.j,
            k=ds_grid_idxs_.k)

        x_ref = ds_shifted.x
        x_est = ds_traj_posn_new.roll(x=-i_shift, roll_coords=False).x_est
        assert np.allclose(x_ref, x_est)
예제 #2
0
def run_sarsa():

    # please do not modify the line below
    env = UnityEnvironment(file_name="./Banana_Linux/Banana.x86_64")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    env_info = env.reset(train_mode=True)[brain_name]  # reset the environment

    low, high = min(env_info.vector_observations[0]), max(
        env_info.vector_observations[0])
    state_grid = create_uniform_grid(low, high, bins=30)
    state_grid = np.array(state_grid)

    q_agent = QLearningAgent(env,
                             state_grid,
                             brain_name,
                             alpha=0.02,
                             gamma=0.99,
                             epsilon=1.0,
                             epsilon_decay_rate=0.9995,
                             min_epsilon=.01,
                             seed=505)

    scores = run(q_agent, env)
예제 #3
0
def test_cyclic_coord_wrapping(grid_style):
    dx = 25.0
    dL = (dx, dx, dx)
    L = (1.0e3, 1.0e3, 500.0)
    ds_grid = create_uniform_grid(dL=dL, L=L, grid_style=grid_style)

    Lx_c, Ly_c, Lz_c = [L[0] / 2.0, L[1] / 2.0, L[2] / 2.0]
    Lx, Ly, Lz = L

    start_and_wrapped_pt_coords = [
        # a point in the center of the domain should remain the same
        ((Lx_c, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)),
        # wrapping in x should map these points to domain center
        ((Lx_c - Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)),
        ((Lx_c + Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)),
        # same in y
        ((Lx_c, Ly_c - Ly, Lz_c), (Lx_c, Ly_c, Lz_c)),
        ((Lx_c, Ly_c + Ly, Lz_c), (Lx_c, Ly_c, Lz_c)),
        # repeats for two wraps
        ((Lx_c - 2 * Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)),
        ((Lx_c + 2 * Lx, Ly_c, Lz_c), (Lx_c, Ly_c, Lz_c)),
        ((Lx_c, Ly_c - 2 * Ly, Lz_c), (Lx_c, Ly_c, Lz_c)),
        ((Lx_c, Ly_c + 2 * Ly, Lz_c), (Lx_c, Ly_c, Lz_c)),
    ]

    def _make_pt_dataset(pt):
        ds_pt = xr.Dataset()
        for n, v in enumerate(["x", "y", "z"]):
            ds_pt[v] = pt[n]
        return ds_pt

    def _pt_from_dataset(ds_pt):
        return np.array([ds_pt[v] for v in ["x", "y", "z"]])

    if grid_style == "cell_centered":
        cell_centered_coords = ["x", "y", "z"]
    elif grid_style == "monc":
        cell_centered_coords = ["x", "y"]
    else:
        cell_centered_coords = []

    for pt_start, pt_wrapped_correct in start_and_wrapped_pt_coords:
        ds_pt_start = _make_pt_dataset(pt_start)
        ds_pt_wrapped = grid_utils.wrap_periodic_grid_coords(
            ds_grid=ds_grid,
            ds_posn=ds_pt_start,
            cyclic_coords=("x", "y"),
            cell_centered_coords=cell_centered_coords,
        )

        np.testing.assert_allclose(_pt_from_dataset(ds_pt_wrapped),
                                   pt_wrapped_correct)
def test_position_scalar_transforms_are_symmetric():
    """
    Test that transforms both to and from the "position scalars" are symmetric
    """
    Lx = Ly = 0.5e3  # [m]
    Lz = 1.0e3  # [m]
    dx = dy = dz = 25.0  # [m]

    ds_grid = create_uniform_grid(dL=(dx, dy, dz), L=(Lx, Ly, Lz))
    nx, ny = int(ds_grid.x.count()), int(ds_grid.y.count())

    N_pts = 5
    ds_pts = xr.Dataset(coords=dict(pt=np.arange(N_pts)))
    ds_pts["x"] = "pt", np.linspace(ds_grid.x.min(), ds_grid.x.max(),
                                    N_pts) - dx / 2.0
    ds_pts["y"] = "pt", np.linspace(ds_grid.y.min(), ds_grid.y.max(),
                                    N_pts) - dy / 2.0
    ds_pts["z"] = "pt", np.linspace(ds_grid.z.min(), ds_grid.z.max(),
                                    N_pts) - dz / 2.0

    for xy_periodic in [True, False]:
        ds_grid["xy_periodic"] = xy_periodic
        ds_position_scalars_pts = advtraj_gm_utils.grid_locations_to_position_scalars(
            ds_grid=ds_grid, ds_pts=ds_pts)
        ds_grid_idxs_pts = advtraj_gm_utils.estimate_initial_grid_indecies(
            ds_position_scalars=ds_position_scalars_pts,
            N_grid=dict(x=nx, y=ny))
        ds_pts_est = advtraj_gm_utils.estimate_3d_position_from_grid_indecies(
            ds_grid=ds_grid,
            i=ds_grid_idxs_pts.i,
            j=ds_grid_idxs_pts.j,
            k=ds_grid_idxs_pts.k,
        )

        np.testing.assert_allclose(ds_pts.x, ds_pts_est.x_est)
        np.testing.assert_allclose(ds_pts.y, ds_pts_est.y_est)
        np.testing.assert_allclose(ds_pts.z, ds_pts_est.z_est)
예제 #5
0
def preprocess_state(state, state_grid):
    """Map a continuous state to its discretized representation."""
    return discretize(state, state_grid)


if __name__ == "__main__":

    # please do not modify the line below
    env = UnityEnvironment(file_name="./Banana_Linux/Banana.x86_64")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    env_info = env.reset(train_mode=True)[brain_name]  # reset the environment

    low, high = min(env_info.vector_observations[0]), max(
        env_info.vector_observations[0])
    state_grid = create_uniform_grid(low, high, bins=30)
    state_grid = np.array(state_grid)

    q_agent = QLearningAgent(env,
                             state_grid,
                             brain_name,
                             alpha=0.02,
                             gamma=0.99,
                             epsilon=1.0,
                             epsilon_decay_rate=0.9995,
                             min_epsilon=.01,
                             seed=505)

    scores = run(q_agent, env)