def test_singlethread(): env_name = 'NSFrozenLakeEnv-v0' n_env = 2 n_epi = 8 tmax = 100 env = gym.make(env_name) agent_name_pool = ['UCT', 'RANDOM'] agent_pool = [ uct.UCT(env.action_space), ra.MyRandomAgent(env.action_space) ] param_names_pool = [[ 'action_space', 'rollouts', 'horizon', 'gamma', 'ucb_constant', 'is_model_dynamic' ], ['action_space']] param_pool = [[[env.action_space, 10, 100, 0.9, 6.36396103068, True], [env.action_space, 100, 100, 0.9, 6.36396103068, True]], [[env.action_space]]] paths_pool = ['data/test_uct.csv', 'data/test_random.csv'] singlethread_benchmark(env_name=env_name, n_env=n_env, agent_name_pool=agent_name_pool, agent_pool=agent_pool, param_pool=param_pool, param_names_pool=param_names_pool, n_epi=n_epi, tmax=tmax, save=True, paths_pool=paths_pool, verbose=True)
def test(): """ Example """ env = gym.make('NSFrozenLakeEnv-v0') nenv = 1 nepi = 3 tmax = 100 agent_name_pool = ['UCT', 'RANDOM'] agent_pool = [ uct.UCT(env.action_space), ra.MyRandomAgent(env.action_space) ] param_names_pool = [[ 'action_space', 'rollouts', 'horizon', 'gamma', 'ucb_constant', 'is_model_dynamic' ], ['action_space']] param_pool = [[[env.action_space, 10, 100, 0.9, 6.36396103068, True], [env.action_space, 100, 100, 0.9, 6.36396103068, True]], [[env.action_space], [env.action_space], [env.action_space]]] paths_pool = ['uct.csv', 'random.csv'] benchmark('NSFrozenLakeEnv-v0', nenv, agent_name_pool, agent_pool, param_pool, param_names_pool, nepi, tmax, save=True, paths_pool=paths_pool, verbose=True)
import gym import dyna_gym.envs.cartpole_dynamic_transition import dyna_gym.agents.uct as uct ### Parameters env = gym.make('CartPoleDynamicReward-v2') agent = uct.UCT( action_space=env.action_space, rollouts=100, horizon=50, is_model_dynamic=True ) timesteps = 1000 verbose = False ### Run env.reset() done = False for ts in range(timesteps): __, reward, done, __ = env.step(agent.act(env,done)) if verbose: env.print_state() env.render() if ts+1 == timesteps: print("Successfully reached end of episode ({} timesteps)".format(ts+1)) if done: print("Episode finished after {} timesteps".format(ts+1)) break
import gym import dyna_gym.envs.cartpole_dynamic_transition import dyna_gym.agents.uct as uct ### Parameters env = gym.make('CartPoleDynamicTransition-v0') agent = uct.UCT(action_space=env.action_space, rollouts=100) timesteps = 100 verbose = False ### Run env.reset() done = False for ts in range(timesteps): __, __, done, __ = env.step(agent.act(env, done)) if verbose: env.print_state() env.render() if ts + 1 == timesteps: print("Successfully reached end of episode ({} timesteps)".format(ts + 1)) if done: print("Episode finished after {} timesteps".format(ts + 1)) break