示例#1
0
 def env_contr():
     #env = gym.make("CartPole-v0")#
     env = simple_world_comm.env()
     env = pad_observations(env)
     env = pad_action_space(env)
     markov_env = aec_to_markov(env)
     venv = MarkovVectorEnv(markov_env)
     return venv
示例#2
0
def env_fn():
    #env = gym.make("CartPole-v0")#
    env = simple_world_comm_v0.env()
    # print(env.action_spaces.values())
    # exit(0)
    env = pad_observations(env)
    env = pad_action_space(env)
    markov_env = aec_to_markov(env)
    venv = MarkovVectorEnv(markov_env)
    return venv
示例#3
0
def test_dehomogenize():
    base_act_spaces = {
        "a_{}".format(idx): Discrete(5 + idx)
        for idx in range(2)
    }

    base_env = DummyEnv(base_obs, base_obs_space, base_act_spaces)
    env = pad_action_space(base_env)
    env.reset()
    assert all([s.n == 6 for s in env.action_spaces.values()])
    env.step(5)
示例#4
0
    }
    base_act_spaces = {"a_{}".format(idx): Discrete(5) for idx in range(2)}

    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


wrappers = [
    aec_wrappers.color_reduction(new_dummy(), "R"),
    aec_wrappers.down_scale(new_dummy(), x_scale=5, y_scale=10),
    aec_wrappers.dtype(new_dummy(), np.int32),
    aec_wrappers.flatten(new_dummy()),
    aec_wrappers.reshape(new_dummy(), (64, 3)),
    aec_wrappers.normalize_obs(new_dummy(), env_min=-1, env_max=5.),
    aec_wrappers.frame_stack(new_dummy(), 8),
    aec_wrappers.pad_observations(new_dummy()),
    aec_wrappers.pad_action_space(new_dummy()),
    aec_wrappers.continuous_actions(new_dummy()),
    aec_wrappers.agent_indicator(new_dummy(), True),
    aec_wrappers.agent_indicator(new_dummy(), False),
]


@pytest.mark.parametrize("env", wrappers)
def test_basic_wrappers(env):
    obs = env.reset()
    act_space = env.action_spaces[env.agent_selection]
    obs_space = env.observation_spaces[env.agent_selection]
    first_obs = env.observe("a_0")
    print(obs_space.low.shape)
    print(obs_space.high.shape)
    print(first_obs.shape)