Exemplo n.º 1
0
from gym import logger

from pong.pong_game import PongGame
from pong.gym_agents import RandomAgent

if __name__ == '__main__':
    # You can set the level to logger.DEBUG or logger.WARN if you
    # want to change the amount of output.
    logger.set_level(logger.INFO)

    env = PongGame()
    env.seed(0)
    agent1 = RandomAgent()
    agent2 = RandomAgent()

    episode_count = 1
    reward = 0
    done = False
    print(env.action_space.n)

    for i in range(episode_count):
        ob = env.reset()
        while True:
            action1 = agent1.act(ob, player=0)
            action2 = agent2.act(ob, player=1)
            ob, reward = env.step(action1, a2=action2)
            if done:
                break
            env.render()

    # Close the env and write monitor result info to disk
Exemplo n.º 2
0
from pong.gym_agents import *
from pong.monitor import PongMonitor

possible_opponents = {
    1: RandomAgent,
    2: GreedyAgent,
    3: AggressiveAgent,
    4: LazyAgent
}

print("Welcome in Pong")
selected_opponent, = input(
    "Select opponent for MCTS (1 - Random, 2 - Safe, 3 - Aggressive, 4 - Lazy): "
).split()

game = PongGame()
game = PongMonitor(game, ".", force=True)
game.reset()

opponent = possible_opponents[int(selected_opponent)]()
mcts_agent = GreedyAgent()

tree = Mcts(game, simulation_agent=mcts_agent)
# tree = Mcts(game)

count = 0

while not game.done:
    count = count + 1
    start = time()
    tree.run(30, verbose=True)
Exemplo n.º 3
0
from mcts.mcts import Mcts
from pong.pong_game import PongGame
from time import time
from pong.gym_agents import *
from pong.monitor import PongMonitor
from ddqn.ddqn_agent import DdqnAgent

game = PongGame()
# game = PongMonitor(game, ".", force=True)

opponent = AggressiveAgent()
# opponent = GreedyAgent()
ddqn_agent = RandomAgent()

_scores = []
_bounces = []

for i in range(15):
    game.reset()
    scores = [0, 0]
    prev_bounce = 0
    while 21 not in scores:
        ob = game._get_obs()
        game.render()
        action1 = ddqn_agent.act(ob, player=0)
        game.act(action1)

        action2 = opponent.act(ob, player=1)
        reward = game.act(action2)

        # game.render()
Exemplo n.º 4
0
#         for run in [60]:
#             for parameter in [False]:
#                 for i in range(0, 15):
#                     playouts.append({
#                         'runs': run,
#                         'agent': agent,
#                         'method': method,
#                         'skip_actions': parameter,
#                         'exploration_parameter': 1.41
#                     })

for playout in playouts:
    print('Playing pong with {} runs, using {} method, against {} opponent'.format(playout['runs'], playout['method'],
                                                                                   opponent_names[
                                                                                       playout['agent']]))
    game = PongGame()
    filename = './logs-defence/' + playout['method'] + '-' + \
               ('no-skip' if playout['exploration_parameter'] is False else 'with-skip') + \
               '-' + str(playout['exploration_parameter']) + \
               '/pong-' + playout['method'] + '-' + str(playout['runs']) + '-against-' + opponent_names[
                   playout['agent']] + '_' + datetime.now().strftime("%Y%m%d-%H%M%S")
    print(filename)
    game = PongMonitor(game, filename, force=False)
    game.reset()

    pong_logger = PDLogger(filename)
    opponent = possible_opponents[agent]()
    mcts_agent = GreedyAgent()

    tree = None
    if playout['method'] == 'greedy':
Exemplo n.º 5
0
        a = 3
    if a <= 0 or a >= 6:
        return
    if human_agent_action == a:
        human_agent_action = 0


print("Press keys (arrow up) (arrow down) to take actions (2 and 3).")
print("No keys pressed is taking action 0 (stay in the same place)")

if __name__ == '__main__':
    # You can set the level to logger.DEBUG or logger.WARN if you
    # want to change the amount of output.
    logger.set_level(logger.INFO)

    env = PongGame()
    outdir = '/tmp/random-agent-results'
    ACTIONS = env.action_space.n

    SKIP_CONTROL = 0  # Use previous control decision SKIP_CONTROL times, that's how you
    # can test what skip is still usable.

    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    env.render()
    env.unwrapped.viewer.window.on_key_press = key_press
    env.unwrapped.viewer.window.on_key_release = key_release

    episode_count = 100
    reward = 0
    done = False