Exemplo n.º 1
0
def test_passreward(env_name='PerceptualDecisionMaking-v0',
                    num_steps=1000,
                    verbose=False):
    """
    Test pass-reward wrapper.
    TODO: explain wrapper
    Parameters
    ----------
    env_name : str, optional
        enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'.
    num_steps : int, optional
        number of steps to run the environment (1000)
    verbose : boolean, optional
        whether to print observation and reward (False)

    Returns
    -------
    None.

    """
    env = gym.make(env_name)
    env = PassReward(env)
    obs = env.reset()
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        assert obs[-1] == rew, 'Previous reward is not part of observation'
        if verbose:
            print(obs)
            print(rew)
            print('--------')
        if done:
            env.reset()
Exemplo n.º 2
0
def test_passreward(env_name, num_steps=10000, verbose=False, **envArgs):
    env = gym.make(env_name, **envArgs)
    env = PassReward(env)
    obs = env.reset()
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        if verbose:
            print(obs)
            print(rew)
            print('--------')
        if done:
            env.reset()