def test_serialization(dim=(8, 8),
                       num_boxes=1,
                       mode='rgb_array',
                       seed=None,
                       curriculum=300):
    from ctypes import c_uint
    if not seed:
        _, seed = seeding.np_random(None)
    env = SokobanEnv(dim_room=dim,
                     max_steps=100,
                     num_boxes=num_boxes,
                     mode=mode,
                     curriculum=curriculum)
    env.seed(seed)
    env.reset()

    state = env.clone_full_state()
    obs = env.render(mode='rgb_array')
    value = np.float32(5.0)

    shapes = (state.shape, obs.shape, (1, ))
    type = (state.dtype, obs.dtype, np.float32)
    buf_size = env.max_steps * np.array([np.prod(x) for x in shapes])

    game = [(state, obs, value), (state, obs, value)]
    serial = serialize_game(game, type, buf_size)
    zz = np.frombuffer(serial, dtype=np.uint8)

    dgame = deserialize_game(serial, buf_size, shapes, type)

    return [[(i == j).all() for i, j in zip(a, b)]
            for a, b in zip(game, dgame)]
def create_env(seed, dim_room=(13, 13), num_boxes=5):
    env = SokobanEnv(dim_room=dim_room,
                     max_steps=100,
                     num_boxes=num_boxes,
                     mode='rgb_array',
                     max_distinct_rooms=10)
    env.seed(seed)
    return env
def test_recover(dim=(13, 13), num_boxes=5, mode='rgb_array', seed=None):
    if not seed:
        _, seed = seeding.np_random(None)
    env = SokobanEnv(dim_room=dim,
                     max_steps=100,
                     num_boxes=num_boxes,
                     mode=mode,
                     max_distinct_rooms=10)
    env.seed(seed)
    env.reset()
    obs = env.render()
    state = env.clone_full_state()
    print(state == env.recover_state(obs))
Exemplo n.º 4
0
def generate_next_frame_and_done_data(env_kwargs,
                                      seed,
                                      n_trajectories=100,
                                      trajectory_len=40,
                                      clone_done=100):
    num_boxes_range = next_frame_and_done_data_params()["num_boxes_range"]
    if num_boxes_range is None:
        print("num_boxes_range", num_boxes_range)
        num_boxes_range = [env_kwargs["num_boxes"]]
    env_kwargs = deepcopy(env_kwargs)
    np.random.seed(seed)
    env_kwargs["num_boxes"] = num_boxes_range[np.random.randint(
        len(num_boxes_range))]

    render_env = SokobanEnv(**env_kwargs)
    render_env.seed(seed)
    trajectories = list()  # [(observations, actions, done), ...]
    for i in range(n_trajectories):
        render_env.reset()
        state = render_env.clone_full_state()
        # generate random path
        trajectories.append(
            random_trajectory(state, render_env, trajectory_len))

    # parse trajectories into arrays
    data_x = list()
    data_y_next_frame = list()
    data_y_if_done = list()

    for obs, actions, done in trajectories:
        data_x.extend([
            image_with_embedded_action(ob, action, render_env.action_space.n)
            for ob, action in zip(obs[:-1], actions)
        ])
        data_y_next_frame.extend([ob for ob in obs[1:]])
        data_y_if_done.extend([False] * (len(actions) - 1) + [done])

        if done and (clone_done > 1):
            data_x.extend([data_x[-1].copy() for _ in range(clone_done)])
            data_y_next_frame.extend(
                [data_y_next_frame[-1].copy() for _ in range(clone_done)])
            data_y_if_done.extend(
                [data_y_if_done[-1] for _ in range(clone_done)])

    data_x = np.array(data_x)
    data_y = {
        Target.NEXT_FRAME.value: np.array(data_y_next_frame),
        "if_done": np.array(data_y_if_done).reshape((-1, 1)).astype(int),
    }
    return data_x, data_y, {}
def test_seed(dim=(13, 13), num_boxes=5, mode='rgb_array', seed=None):
    from ctypes import c_uint
    if not seed:
        _, seed = seeding.np_random(None)
    env = SokobanEnv(dim_room=dim,
                     max_steps=100,
                     num_boxes=num_boxes,
                     mode='rgb_array')
    env.seed(seed)
    print("Seed: {}".format(np.uint32(c_uint(seed))))
    from PIL import Image
    env.reset()
    img = env.render()
    Image.fromarray(img, "RGB").resize((200, 200)).show()