Exemplo n.º 1
0
def test_singlethread():
    env_name = 'NSFrozenLakeEnv-v0'
    n_env = 2
    n_epi = 8
    tmax = 100

    env = gym.make(env_name)
    agent_name_pool = ['UCT', 'RANDOM']
    agent_pool = [
        uct.UCT(env.action_space),
        ra.MyRandomAgent(env.action_space)
    ]
    param_names_pool = [[
        'action_space', 'rollouts', 'horizon', 'gamma', 'ucb_constant',
        'is_model_dynamic'
    ], ['action_space']]
    param_pool = [[[env.action_space, 10, 100, 0.9, 6.36396103068, True],
                   [env.action_space, 100, 100, 0.9, 6.36396103068, True]],
                  [[env.action_space]]]
    paths_pool = ['data/test_uct.csv', 'data/test_random.csv']

    singlethread_benchmark(env_name=env_name,
                           n_env=n_env,
                           agent_name_pool=agent_name_pool,
                           agent_pool=agent_pool,
                           param_pool=param_pool,
                           param_names_pool=param_names_pool,
                           n_epi=n_epi,
                           tmax=tmax,
                           save=True,
                           paths_pool=paths_pool,
                           verbose=True)
Exemplo n.º 2
0
def test():
    """
    Example
    """
    env = gym.make('NSFrozenLakeEnv-v0')
    nenv = 1
    nepi = 3
    tmax = 100

    agent_name_pool = ['UCT', 'RANDOM']
    agent_pool = [
        uct.UCT(env.action_space),
        ra.MyRandomAgent(env.action_space)
    ]
    param_names_pool = [[
        'action_space', 'rollouts', 'horizon', 'gamma', 'ucb_constant',
        'is_model_dynamic'
    ], ['action_space']]
    param_pool = [[[env.action_space, 10, 100, 0.9, 6.36396103068, True],
                   [env.action_space, 100, 100, 0.9, 6.36396103068, True]],
                  [[env.action_space], [env.action_space], [env.action_space]]]
    paths_pool = ['uct.csv', 'random.csv']

    benchmark('NSFrozenLakeEnv-v0',
              nenv,
              agent_name_pool,
              agent_pool,
              param_pool,
              param_names_pool,
              nepi,
              tmax,
              save=True,
              paths_pool=paths_pool,
              verbose=True)
Exemplo n.º 3
0
import gym
import dyna_gym.envs.cartpole_dynamic_transition
import dyna_gym.agents.uct as uct

### Parameters
env = gym.make('CartPoleDynamicReward-v2')
agent = uct.UCT(
    action_space=env.action_space,
    rollouts=100,
    horizon=50,
    is_model_dynamic=True
)
timesteps = 1000
verbose = False

### Run
env.reset()
done = False
for ts in range(timesteps):
    __, reward, done, __ = env.step(agent.act(env,done))
    if verbose:
        env.print_state()
    env.render()
    if ts+1 == timesteps:
        print("Successfully reached end of episode ({} timesteps)".format(ts+1))
    if done:
        print("Episode finished after {} timesteps".format(ts+1))
        break
import gym
import dyna_gym.envs.cartpole_dynamic_transition
import dyna_gym.agents.uct as uct

### Parameters
env = gym.make('CartPoleDynamicTransition-v0')
agent = uct.UCT(action_space=env.action_space, rollouts=100)
timesteps = 100
verbose = False

### Run
env.reset()
done = False
for ts in range(timesteps):
    __, __, done, __ = env.step(agent.act(env, done))
    if verbose:
        env.print_state()
    env.render()
    if ts + 1 == timesteps:
        print("Successfully reached end of episode ({} timesteps)".format(ts +
                                                                          1))
    if done:
        print("Episode finished after {} timesteps".format(ts + 1))
        break