示例#1
0
def test_identity_multidiscrete(model_class):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    with a multidiscrete action space

    :param model_class: (BaseRLModel) A RL Model
    """
    env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)])

    model = model_class("MlpPolicy", env)
    model.learn(total_timesteps=1000, seed=0)

    n_trials = 1000
    reward_sum = 0
    obs = env.reset()
    for _ in range(n_trials):
        action, _ = model.predict(obs)
        obs, reward, _, _ = env.step(action)
        reward_sum += reward

    assert np.array(model.action_probability(obs)).shape == (2, 1, 10), \
        "Error: action_probability not returning correct shape"
    assert np.prod(model.action_probability(obs, actions=env.action_space.sample()).shape) == 1, \
        "Error: not scalar probability"
示例#2
0
def test_identity_multi_discrete(model_class):
    """
    test the MultiDiscrete environment vectorisation detection

    :param model_class: (BaseRLModel) the RL model
    """
    check_shape(lambda: IdentityEnvMultiDiscrete(dim=10), model_class, (2, ),
                (1, 2))
示例#3
0
def test_identity_multi_discrete(model_class):
    """
    test the MultiDiscrete environment vectorisation detection

    :param model_class: (BaseRLModel) the RL model
    """
    model = model_class(policy="MlpPolicy", env=DummyVecEnv([lambda: IdentityEnvMultiDiscrete(dim=10)]))

    env0 = IdentityEnvMultiDiscrete(dim=10)
    env1 = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(dim=10)])

    n_trials = 100
    for env, expected_shape in [(env0, (2,)), (env1, (1, 2))]:
        obs = env.reset()
        for _ in range(n_trials):
            action, _ = model.predict(obs)
            assert np.array(action).shape == expected_shape
            obs, _, _, _ = env.step(action)

    # Free memory
    del model, env
def test_identity_multidiscrete(model_class):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    with a multidiscrete action space

    :param model_class: (BaseRLModel) A RL Model
    """
    env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)])

    model = model_class("MlpPolicy", env)
    model.learn(total_timesteps=1000)
    evaluate_policy(model, env, n_eval_episodes=5)
    obs = env.reset()

    assert np.array(model.action_probability(obs)).shape == (2, 1, 10), \
        "Error: action_probability not returning correct shape"
    assert np.prod(model.action_probability(obs, actions=env.action_space.sample()).shape) == 1, \
        "Error: not scalar probability"
def test_identity_multidiscrete(model_class):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    with a multidiscrete action space

    :param model_class: (BaseRLModel) A RL Model
    """
    env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)])

    model = model_class("MlpPolicy", env)
    model.learn(total_timesteps=1000, seed=0)

    n_trials = 1000
    reward_sum = 0
    obs = env.reset()
    for _ in range(n_trials):
        action, _ = model.predict(obs)
        obs, reward, _, _ = env.step(action)
        reward_sum += reward