Exemplo n.º 1
0
opponent = possible_opponents[int(selected_opponent)]()
mcts_agent = GreedyAgent()

tree = Mcts(game, simulation_agent=mcts_agent)
# tree = Mcts(game)

count = 0

while not game.done:
    count = count + 1
    start = time()
    tree.run(30, verbose=True)
    stop = time()
    ob = game._get_obs()
    # if ob is not None:
    #     game.ale.saveScreenPNG('images/' + str(count) + '-state.png')
    #     print(count, end=" ")
    #     for i, val in enumerate(ob):
    #         print(val, end=" ")
    #     print("")
    print("total time: ", stop - start)
    action1 = tree.predict()
    action2 = opponent.act(ob, player=1)

    game.act(action1)
    tree.move_root(action1)
    game.act(action2)
    tree.move_root(action2)

    game.render()
Exemplo n.º 2
0
from pong.pong_game import PongGame
from pong.gym_agents import RandomAgent

if __name__ == '__main__':
    # You can set the level to logger.DEBUG or logger.WARN if you
    # want to change the amount of output.
    logger.set_level(logger.INFO)

    env = PongGame()
    env.seed(0)
    agent1 = RandomAgent()
    agent2 = RandomAgent()

    episode_count = 1
    reward = 0
    done = False
    print(env.action_space.n)

    for i in range(episode_count):
        ob = env.reset()
        while True:
            action1 = agent1.act(ob, player=0)
            action2 = agent2.act(ob, player=1)
            ob, reward = env.step(action1, a2=action2)
            if done:
                break
            env.render()

    # Close the env and write monitor result info to disk
    env.close()
Exemplo n.º 3
0
if __name__ == '__main__':
    # You can set the level to logger.DEBUG or logger.WARN if you
    # want to change the amount of output.
    logger.set_level(logger.INFO)

    env = PongGame()
    outdir = '/tmp/random-agent-results'
    ACTIONS = env.action_space.n

    SKIP_CONTROL = 0  # Use previous control decision SKIP_CONTROL times, that's how you
    # can test what skip is still usable.

    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    env.render()
    env.unwrapped.viewer.window.on_key_press = key_press
    env.unwrapped.viewer.window.on_key_release = key_release

    episode_count = 100
    reward = 0
    done = False
    print(env.action_space.n)

    for i in range(episode_count):
        ob = env.reset()
        skip = 0
        total_reward = 0
        total_timesteps = 0
        while True:
            if not skip: