def env_contr(): #env = gym.make("CartPole-v0")# env = simple_world_comm.env() env = pad_observations(env) env = pad_action_space(env) markov_env = aec_to_markov(env) venv = MarkovVectorEnv(markov_env) return venv
def env_fn(): #env = gym.make("CartPole-v0")# env = simple_world_comm_v0.env() # print(env.action_spaces.values()) # exit(0) env = pad_observations(env) env = pad_action_space(env) markov_env = aec_to_markov(env) venv = MarkovVectorEnv(markov_env) return venv
def test_dehomogenize(): base_act_spaces = { "a_{}".format(idx): Discrete(5 + idx) for idx in range(2) } base_env = DummyEnv(base_obs, base_obs_space, base_act_spaces) env = pad_action_space(base_env) env.reset() assert all([s.n == 6 for s in env.action_spaces.values()]) env.step(5)
} base_act_spaces = {"a_{}".format(idx): Discrete(5) for idx in range(2)} return DummyEnv(base_obs, base_obs_space, base_act_spaces) wrappers = [ aec_wrappers.color_reduction(new_dummy(), "R"), aec_wrappers.down_scale(new_dummy(), x_scale=5, y_scale=10), aec_wrappers.dtype(new_dummy(), np.int32), aec_wrappers.flatten(new_dummy()), aec_wrappers.reshape(new_dummy(), (64, 3)), aec_wrappers.normalize_obs(new_dummy(), env_min=-1, env_max=5.), aec_wrappers.frame_stack(new_dummy(), 8), aec_wrappers.pad_observations(new_dummy()), aec_wrappers.pad_action_space(new_dummy()), aec_wrappers.continuous_actions(new_dummy()), aec_wrappers.agent_indicator(new_dummy(), True), aec_wrappers.agent_indicator(new_dummy(), False), ] @pytest.mark.parametrize("env", wrappers) def test_basic_wrappers(env): obs = env.reset() act_space = env.action_spaces[env.agent_selection] obs_space = env.observation_spaces[env.agent_selection] first_obs = env.observe("a_0") print(obs_space.low.shape) print(obs_space.high.shape) print(first_obs.shape)