def test_serialization(dim=(8, 8), num_boxes=1, mode='rgb_array', seed=None, curriculum=300): from ctypes import c_uint if not seed: _, seed = seeding.np_random(None) env = SokobanEnv(dim_room=dim, max_steps=100, num_boxes=num_boxes, mode=mode, curriculum=curriculum) env.seed(seed) env.reset() state = env.clone_full_state() obs = env.render(mode='rgb_array') value = np.float32(5.0) shapes = (state.shape, obs.shape, (1, )) type = (state.dtype, obs.dtype, np.float32) buf_size = env.max_steps * np.array([np.prod(x) for x in shapes]) game = [(state, obs, value), (state, obs, value)] serial = serialize_game(game, type, buf_size) zz = np.frombuffer(serial, dtype=np.uint8) dgame = deserialize_game(serial, buf_size, shapes, type) return [[(i == j).all() for i, j in zip(a, b)] for a, b in zip(game, dgame)]
def create_env(seed, dim_room=(13, 13), num_boxes=5): env = SokobanEnv(dim_room=dim_room, max_steps=100, num_boxes=num_boxes, mode='rgb_array', max_distinct_rooms=10) env.seed(seed) return env
def test_recover(dim=(13, 13), num_boxes=5, mode='rgb_array', seed=None): if not seed: _, seed = seeding.np_random(None) env = SokobanEnv(dim_room=dim, max_steps=100, num_boxes=num_boxes, mode=mode, max_distinct_rooms=10) env.seed(seed) env.reset() obs = env.render() state = env.clone_full_state() print(state == env.recover_state(obs))
def generate_next_frame_and_done_data(env_kwargs, seed, n_trajectories=100, trajectory_len=40, clone_done=100): num_boxes_range = next_frame_and_done_data_params()["num_boxes_range"] if num_boxes_range is None: print("num_boxes_range", num_boxes_range) num_boxes_range = [env_kwargs["num_boxes"]] env_kwargs = deepcopy(env_kwargs) np.random.seed(seed) env_kwargs["num_boxes"] = num_boxes_range[np.random.randint( len(num_boxes_range))] render_env = SokobanEnv(**env_kwargs) render_env.seed(seed) trajectories = list() # [(observations, actions, done), ...] for i in range(n_trajectories): render_env.reset() state = render_env.clone_full_state() # generate random path trajectories.append( random_trajectory(state, render_env, trajectory_len)) # parse trajectories into arrays data_x = list() data_y_next_frame = list() data_y_if_done = list() for obs, actions, done in trajectories: data_x.extend([ image_with_embedded_action(ob, action, render_env.action_space.n) for ob, action in zip(obs[:-1], actions) ]) data_y_next_frame.extend([ob for ob in obs[1:]]) data_y_if_done.extend([False] * (len(actions) - 1) + [done]) if done and (clone_done > 1): data_x.extend([data_x[-1].copy() for _ in range(clone_done)]) data_y_next_frame.extend( [data_y_next_frame[-1].copy() for _ in range(clone_done)]) data_y_if_done.extend( [data_y_if_done[-1] for _ in range(clone_done)]) data_x = np.array(data_x) data_y = { Target.NEXT_FRAME.value: np.array(data_y_next_frame), "if_done": np.array(data_y_if_done).reshape((-1, 1)).astype(int), } return data_x, data_y, {}
def test_seed(dim=(13, 13), num_boxes=5, mode='rgb_array', seed=None): from ctypes import c_uint if not seed: _, seed = seeding.np_random(None) env = SokobanEnv(dim_room=dim, max_steps=100, num_boxes=num_boxes, mode='rgb_array') env.seed(seed) print("Seed: {}".format(np.uint32(c_uint(seed)))) from PIL import Image env.reset() img = env.render() Image.fromarray(img, "RGB").resize((200, 200)).show()