Пример #1
0
def test_game_plays():
    a1 = RandomAgent()
    a2 = RandomAgent()

    g = Game(a1, a2)
    winner = g.play(3)
    assert a1 is winner or a2 is winner
Пример #2
0
    def move(self, board, settings):
        # These will be a bunch of synchronous arrays connecting a move (square) to win/loss results
        openSquares = board.getOpenSquares()
        estimatedWins = []
        estimatedLosses = []
        estimatedDraws = []

        # Create the random agents that will play to estimate board state values
        myAgent = RandomAgent(self.type, self.m, self.n, self.k)
        oppType = Square.O_HAS
        if oppType == self.type:
            oppType = Square.X_HAS
        oppAgent = RandomAgent(oppType, self.m, self.n, self.k)

        # Copy the settings and make sure the new one is not set to verbose (too much output)
        newSettings = deepcopy(settings)
        newSettings.verbose = False
        # for each possible move, play a bunch of games to estimate its value
        for x,y in openSquares:
            wins = 0.0
            losses = 0.0
            draws = 0.0
            for i in range(settings.numGamesToEstimateValue):
                # copy the "current" board and make the move we are considering
                newBoard = deepcopy(board)
                newBoard.addPiece(x, y, self.type)

                # play a random game from here
                winner = mnkgame.MNKGame().playGame(newBoard, oppAgent, myAgent, newSettings)

                # do bookkeeping based on who won
                if winner == myAgent:
                    wins += 1.0
                elif winner == oppAgent:
                    losses += 1.0
                else:
                    draws += 1.0

            # Stick results on the synchronous arrays to unpack later.
            estimatedWins.append(wins)
            estimatedLosses.append(losses)
            estimatedDraws.append(draws)

        # compute a scoring function for each move based on wins/losses/draws
        moveScores = [W * 2 - L + D for W,L,D in zip(estimatedWins, estimatedLosses, estimatedDraws)]

        '''
        if settings.verbose:
            print("*** Simulation complete, results: (square, wins, losses, draws, SCORE)")
            for square, wins, losses, draws, score in zip(openSquares, estimatedWins, estimatedLosses, estimatedDraws, moveScores):
                 print(square, wins, losses, draws, score)
        '''

        # select the move with the best score
        return openSquares[np.argmax(moveScores)]
Пример #3
0
def create_agent(name, model):
    """
    Create a specific type of Snake AI agent.

    Args:
        name (str): key identifying the agent type.
        model: (optional) a pre-trained model required by certain agents.

    Returns:
        An instance of Snake agent.
    """

    from agent import DeepQNetworkAgent, PlayerAgent, RandomAgent

    if name == 'human':
        return PlayerAgent()
    elif name == 'dqn':
        if model is None:
            raise ValueError('A model file is required for a DQN agent.')
        return DeepQNetworkAgent(model=model,
                                 memory_size=-1,
                                 num_last_frames=4)
    elif name == 'random':
        return RandomAgent()

    raise KeyError('Unknown agent type: "{%s}"'.format(name))
Пример #4
0
def get_agent(name, properties):
    if name == "random":
        return RandomAgent(world)
    if name == "alphabeta":
        return AlphaBetaAgent(world, properties)
    if name == "human":
        return HumanAgent(world)
    if name == "dls":
        return DLS(world)
Пример #5
0
def build_agent(name):
    if name == "Random":
        return RandomAgent()
    elif name == "MCTS":
        return MCTSAgent()
    elif name == "AlphaZero":
        config = AlphaZeroConfig()
        network = AlphaZeroNetwork(config)
        agent = AlphaZeroAgent(config, network)
        agent.load_model("./pretrained/alphazero_model.pth")
        return agent
    else:
        raise RuntimeError
Пример #6
0
def main(env_name, render=False):
    env = gym.make(env_name)
    # Inicialize seu agente aqui
    agent = RandomAgent(env)
    for episode_i in range(100000):
        state = env.reset()
        done = False
        while not done:
            if render and episode_i % 10 == 0:
                env.render()
            # Ação do seu agente aqui
            action = agent.act(state)
            state, reward, done, info = env.step(action)
Пример #7
0
    def __init__(self):
        # Board parameters
        self.m = 7
        self.n = 6
        self.k = 5

        # Agent parameters.  Specify different agent types here via constructor
        self.Xagent = RandomAgent(Square.X_HAS, self.m, self.n, self.k)
        self.Oagent = cnnAgent(Square.O_HAS, self.m, self.n, self.k)
        self.numGamesToEstimateValue = 5

        # Outermost loop parameters
        self.numGamesToTest = 10
        self.numGamesToTrain = 10
        self.verbose = False
Пример #8
0
    def execute(self):
        for k in np.arange(self.max_runs):
            environment = KArmedTestbed(self.number_of_arms)
            agent = RandomAgent(self.number_of_arms)

            for i in np.arange(self.max_steps):
                current_state = environment.get_current_state()
                next_action = agent.get_action(current_state)
                reward = environment.do_action(next_action)

                if not self.avg_reward[i]:
                    self.avg_reward[i] = reward
                else:
                    self.avg_reward[i] = self.avg_reward[i] + (
                        reward - self.avg_reward[i]) / (i + 1.0)
Пример #9
0
def train(env, model, args):
    model.optim = torch.optim.Adam(islice(model.parameters(), 20), lr=0.0005)
    model.pos_optim = torch.optim.Adam(model.eval_mlp.parameters(), lr=0.0005)
    replay_buffer = MemoryBuffer(int(args.batch))

    agent = RandomAgent(env.action_spec())
    max_steps = args.num_steps
    env.reset()

    step = 0
    sub_trajectory = SubTrajectory(100)

    pbar = tqdm(total = max_steps)

    while step < max_steps:
        action = agent.step()
        # for _ in range(np.random.randint(1,5)):
        rgb, pos, orientation = env.observations()['RGB'], env.observations()['DEBUG.POS.TRANS'], env.observations()['DEBUG.POS.ROT']
        reward = env.step(action)
        if (not env.is_running()):
            env.reset()
        else:
            new_rgb, new_pos, new_orientation = env.observations()['RGB'], env.observations()['DEBUG.POS.TRANS'], env.observations()['DEBUG.POS.ROT']
        
        if sub_trajectory.len == 100:
            tmp = copy.deepcopy(sub_trajectory)
            # Send initial belief to replay buffer
            o_0 = torch.from_numpy(tmp.new_rgb[0]).to(dtype=torch.float32).unsqueeze(0).to(device)
            a_0 = torch.from_numpy(tmp.action[0]).to(dtype=torch.float32).unsqueeze(0).to(device)
            z_0 = model.conv(o_0)
            bgru_input = torch.cat((z_0, a_0), dim=1)
            _, tmp.belief = model.belief_gru.gru1(torch.unsqueeze(bgru_input, 1))
            replay_buffer.add(tmp)
            sub_trajectory.clear()

        sub_trajectory.add(rgb, pos, orientation, action, new_rgb, new_pos, new_orientation)

        # Train using replay_buffer
        if step >= args.batch * 100:
            train_batch = replay_buffer.sample(64)
            if None in train_batch:
                raise Exception("Training batch contains None object")
            model.update(train_batch)

        step += 1
        pbar.update(1)
    
    pbar.close()
Пример #10
0
 def __init__(self, agent_type):
     # TODO:: Add agent selector
     if agent_type == 'random':
         self.agent = RandomAgent()
     elif agent_type == 'fixed-action':
         self.agent = FixedActionAgent()
     elif agent_type == 'a2c':
         self.agent = A2CAgent(num_steps=50, max_frames=1000)
     elif agent_type == 'a3c':
         self.agent = A3CAgent(num_envs=1, num_steps=50, max_frames=1000)
     else:
         status = {
             'status': 'ERROR',
             'error_msg': 'Not supported agent-type'
         }
         raise Exception(status)
Пример #11
0
def markovDecision(layout, circle):
    env = SnakesAndLadder(layout, circle)
    agent = RandomAgent(env.action_space)

    n_episodes = 50

    for episode in range(n_episodes):
        state = env.reset()
        done = False
        while not done:
            action = agent.select_action(state)
            next_state, reward, done = env.step(action)

            agent.update(state, action, reward, next_state)

            state = next_state
Пример #12
0
def simulate_random_agent(games):
    data = {
        "actions": [],
        "rewards": np.zeros(NUM_STEPS)
    }
    for g in tqdm(range(NUM_GAMES), desc="Random Agent"):
        agent = RandomAgent(NUM_ARMS, NUM_STEPS)
        game = games[g]

        actions, rewards = agent.play(game)

        data["actions"].extend(actions)
        data["rewards"] += rewards

    # Convert sum to average reward per step.
    data["rewards"] /= NUM_GAMES

    return data
Пример #13
0
    def __init__(self, token, agent_type):
        self.token = token
        self.remote_base = "http://grader.crowdai.org:1729"
        self.client = Client(self.remote_base)

        # TODO:: Add agent selector
        if agent_type == 'random':
            self.agent = RandomAgent()
        elif agent_type == 'fixed-action':
            self.agent = FixedActionAgent()
        elif agent_type == 'a3c':
            self.agent = A3CAgent(num_envs=2, num_steps=50, max_frames=1000)
        else:
            status = {
                'status': 'ERROR',
                'error_msg': 'Not supported agent-type'
            }
            raise Exception(status)
Пример #14
0
def test_random():
    agent = RandomAgent()
    env = Atari()

    for i_episode in range(10):
        episode_reward = 0
        state = env.reset()

        for _ in range(10000):
            env.env.render()
            action = agent.evaluate_action(state)
            next_state, reward, done, _ = env.step(action.item())
            state = next_state
            episode_reward += reward

            if done:
                print(
                    "Episode: %6d, interaction_steps: %6d, reward: %2d, epsilon: %f"
                    %
                    (i_episode, agent.interaction_steps, episode_reward, 1.0))
                break
Пример #15
0
def plot_cost(layout, circle, legend=True, title=None):
    agents = [
        OptimalAgent(layout, circle),
        ConstantAgent(SECURITY_DICE),
        ConstantAgent(NORMAL_DICE),
        ConstantAgent(RISKY_DICE),
        RandomAgent([SECURITY_DICE, NORMAL_DICE, RISKY_DICE])
    ]
    labels = [
        "Optimal", "Security dice", "Normal dice", "Risky dice", "Random"
    ]
    costs = [
        estimate_cost(layout, circle, pi, n_episodes=1000) for pi in agents
    ]
    states = np.arange(14) + 1

    plt.figure(figsize=(6, 4), dpi=120)
    ax = plt.gca()

    C_th, _ = markovDecision(layout, circle)
    plt.plot(states, C_th, ls='--', color=colors[0], lw=1.8)

    for i, C in enumerate(costs):
        plt.plot(states,
                 C,
                 marker="o",
                 color=colors[i],
                 lw=1.8,
                 label=labels[i])

    plt.xlabel("States")
    plt.ylabel("Total expected cost")
    plt.legend() if legend else ""
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    plt.title(title)
    plt.show()
Пример #16
0
def main(view=False, result=False):
    player1 = Human()
    player2 = RandomAgent()

    env = TicTocToe()
    state = env.setGame()
    done = False
    turn = FIRST

    try:
        while not done:
            if view:
                viewBoard(state)
            if turn == FIRST:
                action = player1.action(state)
            else:
                action = player2.action(state)

            state, info = env.step(divmod(action, 3))
            done = (info == ENDGAME)
            turn *= -1
            time.sleep(0.1)

        if env.winner:
            name = "player1" if env.winner == FIRST else "player2"
            print(name + " win!")
        else:
            print("引き分けです")
        if result:
            print("--- result ----------------")
            state.show()

    except KeyboardInterrupt:
        print("終了します。")
    except:
        import traceback
        traceback.print_exc()
        name = "player1" if turn != FIRST else "player2"
        print(name + " win!")
Пример #17
0
 def __init__(self, mode: str):
     super().__init__()
     self.state = np.full((rows, cols), 0)
     self.blocks = np.full((rows, cols), None)
     self.cr = -1
     self.cc = -1
     self.mode = mode
     self.player = 1
     if randint(0, 1) == 0:
         self.player = -1
     self.takeInput = True
     if mode == Game.HUMAN_MODE and self.player == 1:
         self.takeInput = False
     self.ai_agent = None
     self.random_agent = None
     #print(self.mode)
     if not (self.mode == Game.HUMAN_HUMAN_MODE):
         self.ai_agent = Agent(rows, cols)
     if self.mode == Game.AI_MODE:
         print("init")
         self.random_agent = RandomAgent(rows, cols)
         print("inited")
     self.initUI()
Пример #18
0
    def __init__(self, n_players, agents=None, card_set='random', verbose=False):
        '''Initialize a new game, with n players.

        Args:
            n_players (int): Number of players in this game. Must be
                between 2 and 4.
            agents (dict): Contains the agents who will play in this
            game. The dict key is used as the player_id. If n_players is
            greater than the number of agents provided, RandomAgents
            will be used for the remaining players.
            card_set (str): Indicates which pre-specified card set to
            use. Options are 'random' and 'base'. Default: 'random'.
            verbose (bool): Indicates whether to print game state as
            actions take place. Default: 'False'.
        '''
        if agents is None:
            agents = {}

        assert n_players >= 2 and n_players <= 4, "n_players must be between 2 and 4"
        assert len(agents) <= n_players, "must not have more agents than n_players"
        self.n_players = n_players
        self.card_set = card_set
        self.verbose = verbose

        players = []
        for player_id, agent in six.iteritems(agents):
            players.append(Player(player_id=player_id, agent=agent))

        for n in range(len(agents), self.n_players):
            players.append(Player(player_id='Player ' + str(n), agent=RandomAgent()))
        self.players = players

        self.supply_piles = SupplyPiles(n_players=self.n_players,
                                        card_set=self.card_set)
        if self.verbose:
            self.supply_piles.display_supply_pile_count()
import numpy as np
import gym
from agent import RandomAgent
from benchmarker import GameBenchmarker

if __name__ == '__main__':
    env = gym.make("Pong-v0")
    num_actions = env.action_space.n
    agent = RandomAgent(num_actions)
    gb = GameBenchmarker(env, 10, render=True)
    mean_rew = gb.benchmark_agent(agent)
    print(mean_rew)
Пример #20
0
def main(args):
    parser = ArgumentParser(description='')
    
    parser.add_argument('--height', type=int, required=True,
                        help='Heigth of the map')
    parser.add_argument('--width', type=int, required=True,
                        help='Width of the map')
    parser.add_argument('--num-powerups', type=int, required=True,
                        help='Number of powerups to put in the game map')
    parser.add_argument('--num-monsters', type=int, required=True,
                        help='Number of monsters to put in the game map')
    parser.add_argument('--initial-strength', default=100, type=int,
                        help='Initial strength of each agent')
    parser.add_argument('--save-dir', type=str,
                        help='Save directory for saving the map')
    parser.add_argument('--map-file', type=str,
                        help='Path to the map JSON file')
    parser.add_argument('--play-against-human', action='store_true',
                        help='Whether to have a Human player as one of the '
                        'agents in the game')
    parser.add_argument('--play-against-seekers',action='store_true', help='Whether to have a Seeker player as one of the '
                        'agents in the game')
    parser.add_argument('--show-map', action='store_true',
                        help='Whether to display the map in the terminal')
    parser.add_argument('--map-type', choices=MAP_TYPES, default='ascii',
                        help='Select map type. Choices are {' +
                        ', '.join(MAP_TYPES) + '}')
    parser.add_argument('--verbose', action='store_true',
                        help='Whether to be verbose when playing game')

    args = parser.parse_args(args)

    # TODO: Change how agents are populated
    agent = RandomAgent(args.height, args.width, args.initial_strength)

    agents = [agent]
    if args.play_against_human:
        human = HumanAgent(args.height, args.width, args.initial_strength)
        agents.append(human)

    game_driver = GameDriver(
        height=args.height, width=args.width,
        num_powerups=args.num_powerups,
        num_monsters=args.num_monsters,
        agents=agents,
        initial_strength=args.initial_strength,
        show_map=args.show_map, map_type=args.map_type,
        save_dir=args.save_dir, map_file=args.map_file)

    #Play against seekers
    if args.play_against_seekers:
        seeker = SeekerAgent(args.height, args.width, args.initial_strength)
        agents.append(seeker)

    game_driver = GameDriver(
        height=args.height, width=args.width,
        num_powerups=args.num_powerups,
        num_monsters=args.num_monsters,
        agents=agents, initial_strength=args.initial_strength, show_map=args.show_map, map_type=args.map_type, save_dir=args.save_dir, map_file=args.map_file)

    print('Starting game')
    game_driver.play(verbose=args.verbose)
Пример #21
0
import time
import gym

from agent import RandomAgent

env = gym.make("CartPole-v1")

agent = RandomAgent(env.action_space)

episode_count = 10
reward = 0
done = False

for i in range(episode_count):
    ob = env.reset()
    while True:
        action = agent.act(ob, reward, done)
        ob, reward, done, info = env.step(action)
        if done:
            print("Game Finished!")
            break
        env.render()
        time.sleep(1 / 30)
    env.close()
Пример #22
0
def play_random(params_path):
    Parameters.load(params_path)
    environment = Environment()
    agent = RandomAgent(environment)
    all_scores = agent.play()
    print('mean: ', np.mean(all_scores), '\tstd: ', np.std(all_scores))
Пример #23
0
    parser.add_argument('--gamma',
                        default=0.99,
                        type=float,
                        help='discount factor')
    args = parser.parse_args()

    # set seed
    random.seed(args.seed)
    np.random.seed(args.seed)

    env = MazeEnv(args, args.game_name, args.graph_param, args.game_len,
                  args.gamma)

    # agent
    if args.agent == 'random':
        agent = RandomAgent(args, env)

    NUM_GRAPH = 100
    NUM_ITER = 32
    ep_rews = []
    for graph_id in range(NUM_GRAPH):
        for _ in range(NUM_ITER):
            ep_rew = 0
            state, info = env.reset(graph_index=graph_id)
            done = False
            while not done:
                action = agent.act(state)
                state, rew, done, info = env.step(action)
                ep_rew += rew
            ep_rews.append(ep_rew)
Пример #24
0
    batch_size = 32
    num_steps = 2
#    ##### Initialization for learning Agent
#    env = environment.NYCTaxiEnv(data_months=data_months)
#    brain = LearningAgent(gamma, epsilon, alpha, maxMemorySize, epsEnd,
#                          action_space, replace)
#    start = time.time()
#    initialize(env, brain)
#    end = time.time()
#    print('%d seconds passed for initialization' %(end - start))
#    pickle.dump(brain, open('brain_init.pickle','wb'))
#    ##### Training Learning Agent
#
#    scores,steps,epsHistory = train_agent(env, brain, numGames, batch_size, num_steps)
#    brain.saveModelCheckpoint('./../output/')
#    brain.saveModelState('./../output/')
#    filename = str(numGames) + 'Games' + 'Gamma' + str(brain.GAMMA) + \
#               'Alpha' + str(brain.ALPHA) + 'Memory' + str(brain.memSize)
#    df = pd.DataFrame({'scores':scores,'steps':steps,'epsHistory':epsHistory})
#    df.to_csv(output_dir + 'LearningAgent' + filename + '.csv', index=False)
    
    ##### Random agent
    env = environment.NYCTaxiEnv(data_months=data_months)
    randombrain = RandomAgent(maxMemorySize=1,action_space=action_space)   
    numGames = 100
    scores,steps = test_agent(env, randombrain, numGames)
    fileName = str(numGames) + 'Games'
    df = pd.DataFrame({'scores':scores,'steps':steps})
    df.to_csv(output_dir + 'RandomAgent' + fileName + '.csv', index=False)
    
    
Пример #25
0
def game_mngr():
    """
  Game manager, used for navigation among different choices 
  offered to user.
  """

    # Options
    command = options('PLAY', 'RULES', 'Tap 1 to play or 2 to read the rules')

    # Rules page
    if int(command) == 2:
        print_rules()
        # Go back
        print('Tap 1 to come back to the main menu\n')
        comeback = tap_valid_digits([1])
        if int(comeback):
            game_mngr()

    # Game page
    if int(command) == 1:
        # Options
        players = options('PLAYER',
                          'PLAYERS',
                          'How many players ?',
                          comeback=True)

        # Go back
        if int(players) == 0:
            game_mngr()

        # 2 players
        if int(players) == 2:

            # Ask players' name
            player1, player2 = input_names(n_players=2)

            # Init scores
            scores = [0, 0]

            # Games
            tapnswap = TapnSwap()
            over = False
            while not over:
                game_over, winner = game_1vs1(tapnswap, player1, player2)
                scores[winner] += 1
                if game_over:
                    # Display scores
                    restart = display_endgame(scores, player1, player2)
                    # Go back
                    if not restart:
                        over = True
                        game_mngr()

        # 1 player
        if int(players) == 1:

            # Options
            level = options('EASY',
                            'DIFFICULT',
                            'Which level ?',
                            comeback=True)

            # Go back
            if int(level) == 0:
                game_mngr()

            # Define agent
            elif int(level) == 1:
                agent = RandomAgent()  # easy
            else:
                # Load agent
                agent = RLAgent()
                agent.load_model('greedy0_2_vsRandomvsSelf')  # difficult

            # Ask player's name
            player = input_names(n_players=1)

            # Init scores
            scores = [0, 0]

            # Games
            tapnswap = TapnSwap()
            over = False
            while not over:
                game_over, winner = game_1vsAgent(tapnswap,
                                                  player,
                                                  agent,
                                                  greedy=False)
                scores[winner] += 1
                if game_over:
                    # Display scores
                    restart = display_endgame(scores, player, 'Computer')
                    # Go back
                    if not restart:
                        over = True
                        game_mngr()
Пример #26
0
from othello import Othello
from agent import Agent, RandomAgent, GreedyAgent, DullAgent, Human
from MCTS import MCTSAgent

if __name__ == '__main__':
    agent1 = MCTSAgent(1000)
    agents = [RandomAgent(), GreedyAgent(), DullAgent(), MCTSAgent(200)]

    for agent2 in agents:
        print('========')
        print(agent1, 'vs', agent2)
        black_win = 0
        white_win = 0
        tie = 0
        for _ in range(100):
            game = Othello(agent1, agent2, print_mode=False)
            winner = game.play()
            if winner == 1:
                black_win += 1
            elif winner == -1:
                white_win += 1
            elif winner == 0:
                tie += 1
        print(agent1, black_win)
        print(agent2, white_win)
        print('Tie', tie)
        print()
Пример #27
0
def challenger_round():
    challengers = []
    leaders = []
    leader_checkpoints = os.listdir(LEADER_DIR)
    # Need to share the same schedule with all challengers, so they all anneal
    # at same rate
    epsilon_schedule = LinearSchedule(EPS_START, EPS_END, TRAIN_FRAMES)
    for i in xrange(NUM_LEADERS):
        challenger = try_gpu(
            DQNAgent(6,
                     epsilon_schedule,
                     OBSERVATION_MODE,
                     lr=LR,
                     max_grad_norm=GRAD_CLIP_NORM))
        if i < len(leader_checkpoints):
            leader = try_gpu(
                DQNAgent(6, LinearSchedule(0.1, 0.1, 500000),
                         OBSERVATION_MODE))
            leader_path = os.path.join(LEADER_DIR, leader_checkpoints[i])
            print "LOADING CHECKPOINT: {}".format(leader_path)
            challenger.load_state_dict(
                torch.load(leader_path,
                           map_location=lambda storage, loc: storage))
            leader.load_state_dict(
                torch.load(leader_path,
                           map_location=lambda storage, loc: storage))
        else:
            leader = RandomAgent(6)
            print "INITIALIZING NEW CHALLENGER AND LEADER"
        challengers.append(challenger)
        leaders.append(leader)

    if CHALLENGER_DIR is not None:
        challengers = []
        # Load in all of the leaders
        for checkpoint in os.listdir(CHALLENGER_DIR):
            path = os.path.join(CHALLENGER_DIR, checkpoint)
            print "LOADING FROM CHALLENGER_DIR: {}".format(path)
            challenger = try_gpu(
                DQNAgent(6,
                         LinearSchedule(0.05, 0.05, 1),
                         CHALLENGER_OBSERVATION_MODE,
                         lr=LR,
                         max_grad_norm=GRAD_CLIP_NORM,
                         name=checkpoint))
            challenger.load_state_dict(
                torch.load(path, map_location=lambda storage, loc: storage))
            challengers.append(challenger)

    challenger = EnsembleDQNAgent(challengers)
    leader = EnsembleDQNAgent(leaders)
    if OPPONENT is not None or HUMAN:
        leader = NoOpAgent()
    replay_buffer = ReplayBuffer(1000000)
    rewards = collections.deque(maxlen=1000)
    frames = 0  # number of training frames seen
    episodes = 0  # number of training episodes that have been played
    with tqdm(total=TRAIN_FRAMES) as progress:
        # Each loop completes a single episode
        while frames < TRAIN_FRAMES:
            states = env.reset()
            challenger.reset()
            leader.reset()
            episode_reward = 0.
            episode_frames = 0
            # Each loop completes a single step, duplicates _evaluate() to
            # update at the appropriate frame #s
            for _ in xrange(MAX_EPISODE_LENGTH):
                frames += 1
                episode_frames += 1
                action1 = challenger.act(states[0])
                action2 = leader.act(states[1])
                next_states, reward, done = env.step(action1, action2)
                episode_reward += reward

                # NOTE: state and next_state are LazyFrames and must be
                # converted to np.arrays
                replay_buffer.add(
                    Experience(states[0], action1._action_index, reward,
                               next_states[0], done))
                states = next_states

                if len(replay_buffer) > 50000 and \
                        frames % 4 == 0:
                    experiences = replay_buffer.sample(32)
                    challenger.update_from_experiences(experiences)

                if frames % 10000 == 0:
                    challenger.sync_target()

                if frames % SAVE_FREQ == 0:
                    # TODO: Don't access internals
                    for agent in challenger._agents:
                        path = os.path.join(LEADER_DIR,
                                            agent.name + "-{}".format(frames))
                        print "SAVING CHECKPOINT TO: {}".format(path)
                        torch.save(agent.state_dict(), path)
                    #path = os.path.join(
                    #        LEADER_DIR, challenger.name + "-{}".format(frames))
                    #torch.save(challenger.state_dict(), path)

                if frames >= TRAIN_FRAMES:
                    break

                if done:
                    break

            if episodes % 300 == 0:
                print "Evaluation: {}".format(
                    evaluate(challenger, leader, EPISODES_EVALUATE_TRAIN))
            print "Episode reward: {}".format(episode_reward)
            episodes += 1
            rewards.append(episode_reward)
            stats = challenger.stats
            stats["Avg Episode Reward"] = float(sum(rewards)) / len(rewards)
            stats["Num Episodes"] = episodes
            stats["Replay Buffer Size"] = len(replay_buffer)
            progress.set_postfix(stats, refresh=False)
            progress.update(episode_frames)
            episode_frames = 0
Пример #28
0
def trainAndTestCNNAgent(settings):
    ourHero = cnnAgent(Square.X_HAS, settings.m, settings.n, settings.k)
    trainingPartner = SearchAgent(Square.O_HAS, settings.m, settings.n,
                                  settings.k)

    gauntlet1 = RandomAgent(Square.O_HAS, settings.m, settings.n, settings.k)
    gauntlet2 = SearchAgent(
        Square.O_HAS, settings.m, settings.n, settings.k
    )  # FIXME but add more search (settings specifying that parameter globally is bad)
    theGauntlet = [gauntlet1, gauntlet2]

    trainingSessions = 100
    print("****Testing ", settings.numGamesToTest,
          " games of agents looking for sequences of length k=", settings.k,
          " using ", settings.numGamesToEstimateValue,
          " games to estimate value")
    print("Agents: X: ", ourHero)
    print(" and O: ", trainingPartner)

    print(
        "\tTestSession\tHeroWins\tHeroLoses\tHeroDraws\tAvgIllegalMoves\tAvgGameLength\tMaxGameLength\tOpponent\t(REPEAT)"
    )

    for session in range(trainingSessions):
        for i in range(settings.numGamesToTrain):
            # new game, create a fresh board
            if settings.verbose:
                print("Creating a M x N board, where m =", settings.m,
                      " and n=", settings.n, "\n")
            board = Board(settings.m, settings.n)

            # play the game, taking turns being first to act
            if i % 2 == 0:
                MNKGame().playGame(board, ourHero, trainingPartner, settings)
            else:
                MNKGame().playGame(board, trainingPartner, ourHero, settings)

        # test vs the gauntlet
        for opponent in theGauntlet:
            heroWins = 0
            heroLoses = 0
            heroDraws = 0
            totalIllegalMoves = 0
            maxGameLength = 0
            totalGameLength = 0
            for j in range(settings.numGamesToTest):
                # new game, create a fresh board
                if settings.verbose:
                    print("Creating a M x N board, where m =", settings.m,
                          " and n=", settings.n, "\n")
                board = Board(settings.m, settings.n)

                # play the game, taking turns being first to act
                if j % 2 == 0:
                    winner, moveCount, illegalMoveCount = MNKGame().playGame(
                        board, ourHero, opponent, settings)

                else:
                    winner, moveCount, illegalMoveCount = MNKGame().playGame(
                        board, opponent, ourHero, settings)

                # do the bookkeeping now that a result is obtained
                totalIllegalMoves += illegalMoveCount
                totalGameLength += moveCount
                if moveCount > maxGameLength:
                    maxGameLength = moveCount

                if winner == ourHero:
                    heroWins += 1
                    if settings.verbose:
                        print(
                            "X emerges victorious over the vile O!!!!! in game ",
                            i)
                elif winner != None:
                    heroLoses += 1
                    if settings.verbose:
                        print("O has defeated the disgusting X!!!!! in game ",
                              i)
                elif winner == None:
                    heroDraws += 1
                    if settings.verbose:
                        print("fought to a draw... maybe next time. In game ",
                              i)

            # All games vs this opponent complete, generate some final output

            print("\t",
                  session,
                  "\t",
                  heroWins,
                  "\t",
                  heroLoses,
                  "\t",
                  heroDraws,
                  "\t",
                  float(totalIllegalMoves) / settings.numGamesToTest,
                  "\t",
                  float(totalGameLength) / settings.numGamesToTest,
                  "\t",
                  maxGameLength,
                  "\t",
                  opponent,
                  end='')
        print("")
Пример #29
0
def game_2Agents(agent1,
                 agent2,
                 start_idx=-1,
                 train=True,
                 time_limit=None,
                 n_games_test=0,
                 play_checkpoint_usr=False,
                 verbose=False):
    """
  Manages a game between 2 agents (agent1, agent2) potentially 
  time-limited, with possibility to train them, to confront 1 of 
  them through a game with user before the game between the 2 agents 
  (agent1, agent2) and to test 1 of them through several games 
  against a Random Agent after the game between the 2 agents 
  (agent1, agent2).

  Parameters
  ----------
  agent1, agent2: instances of Agent
    Agents involved in the game.
  start_idx: -1, 0 or 1
    Index of the agent that starts the game 
    (0: agent1, 1: agent2, -1: random).
  train: boolean
    Set to True to train both agents.
  time_limit: int
    Maximum number of rounds between the 2 agents 
    (possibility of loops with optimal actions).
    Avoid to set it to 0 in case of identical agents.
  n_games_test: int
    Number of games between agent1 and a Random Agent following
    the game between agent1 and agent2.
  play_checkpoint_usr: boolean
    Set to True for a game between the user and agent1 
    preceding the game between agent1 and agent2.
  verbose: boolean
    Set to True for a written explanation of each round.

  Return
  ------
  game_over: boolean.
  winner: index of winner agent (0: agent1, 1: agent2, -1: tie).
  test_results: list of int
    Only working if n_games_test > 0 (otherwise empty list 
    by default). If n_games_test > 0:
    * test_results[0]: number of finished games.
    * test_results[1]: number of games = n_games_test.
    * test_results[2]: score of agent1.
    * test_results[3]: score of Random Agent.
  """

    tapnswap = TapnSwap()
    tapnswap.reset()

    # Time of pause between several actions (if verbose)
    delay = 2

    # Preliminary game with the user
    if play_checkpoint_usr:
        game_1vsAgent(tapnswap, 'test player', agent1, greedy=False)
        tapnswap.reset()

    # Select starting player
    if start_idx == -1:
        np.random.seed()
        player_idx = np.random.randint(0, 2)
    else:
        assert start_idx == 0 or start_idx == 1, \
        'The starting agent index must be 0, 1 or -1.'
        player_idx = start_idx

    agents = [agent1, agent2]
    names = ['Agent1', 'Agent2']

    count_rounds = 0
    prev_state = []
    prev_action = []

    # Start game
    game_over = False
    while not game_over:
        if verbose:
            # Print current configuration
            show_score(tapnswap, names, 1 - player_idx, invert=False)
            time.sleep(delay)

        # Get current state
        hands = tapnswap.show_hands().copy()
        state = [hands[player_idx], hands[1 - player_idx]]
        # Choose action
        actions = tapnswap.list_actions(player_idx)
        action = agents[player_idx].choose_action(state, actions, greedy=train)
        # Take action and get reward
        reward = tapnswap.take_action(player_idx, action)

        if verbose:
            # Print chosen action
            seq = str(names[player_idx])
            if action[0] == 0:
                seq = seq + str(' tapped with ' +
                                str(hands[player_idx, action[1]]) + ' on ' +
                                str(hands[1 - player_idx, action[2]]))
            else:
                new_hands = tapnswap.show_hands().copy()
                seq = seq + str(' swapped ' + str(hands[player_idx][0]) + '-' +
                                str(hands[player_idx][1]) + ' for ' +
                                str(new_hands[player_idx][0]) + '-' +
                                str(new_hands[player_idx][1]))
            print(seq)
            time.sleep(delay)
            print()
            # Print new configuration
            show_score(tapnswap, names, player_idx)
            time.sleep(delay)
            # Print corresponding reward
            print('Reward of ', names[player_idx], ' : ', reward)
            time.sleep(delay)
            print('----------------------------')

        game_over, winner = tapnswap.game_over()

        # Training
        if train:
            # Get new state
            next_hands = tapnswap.show_hands().copy()
            next_state = [next_hands[player_idx], next_hands[1 - player_idx]]
            # Train playing agent for a winning move
            if game_over:
                agents[player_idx].update_Q(state, action, reward, next_state)
            # Train waiting agent (response of the environment)
            if count_rounds:
                # New state in other's agent point of view
                inv_next_state = [
                    next_hands[1 - player_idx], next_hands[player_idx]
                ]
                # Each waiting agent receives the transition with the
                # response of the environment for the new state
                agents[1 - player_idx].update_Q(prev_state, prev_action,
                                                -reward, inv_next_state)
            # Keep in memory previous state and action
            prev_state = state
            prev_action = action

        # Avoid loops
        if time_limit is not None:
            if count_rounds > time_limit and not game_over:
                game_over = True
                winner = -1

        # Next round
        player_idx = 1 - player_idx
        count_rounds += 1

    # Test of agent1
    test_results = []
    if bool(n_games_test):
        random_agent = RandomAgent()
        test_results = compare_agents(agent1,
                                      random_agent,
                                      n_games=n_games_test,
                                      time_limit=None,
                                      verbose=False)

    return game_over, winner, test_results
Пример #30
0
def train(n_epochs,
          epsilon,
          gamma,
          load_model,
          filename,
          random_opponent,
          n_games_test,
          freq_test,
          n_skip_games=int(0),
          verbose=False):
    """
  Train 2 agents by making them play and learn together. Save the
  learned Q-function into CSV file. It is possible to confront 1 of 
  the agents (against either the user or a Random Agent) during 
  training, as often as one wants. It is also possible to train an already 
  trained model.

  Parameters
  ----------
  n_epochs: int
    Number of games used for training.
  epsilon: float (in [0,1])
    Fraction of greedy decisions during training of the 2 RL Agents.
  gamma: float (in [0,1])
    Factor of significance of first actions over last ones for the 
    2 RL Agents.
  load_model: string
    CSV filename in which is stored the learned Q-function of an 
    agent. If load_model = 'model', the function loads the model 
    './Models/model.csv'. If load_model is not None, the previous 
    parameters epsilon and gamma are used for a second training.
  filename: string
    Name of the CSV file that will store the learned Q-function 
    of one of the agents. The path to CSV file is 
    then ./Models/filename.csv. The counter of state-action
    pairs is also stored at ./Models/data/count_filename.csv for
    future training.
  random_opponent: boolean
    If set to true, the function trains 1 RL Agent by making it 
    play against a Random Agent. Otherwise, the RL agent is
    trained by playing against another version of itself.
  n_games_test: int
    Number of games one of the RL Agent plays against a Random Agent
    for testing. If set to 0, the RL Agents will not be tested by a 
    Random Agent. 
  freq_test: int
    Number of epochs after which one of the RL Agents plays n_games_test
    games against a Random Agent. If set to 1000, each 1000 epochs of
    training, one of the RL Agents is tested against a Random Agent.
    If set to 0, test occurs at the last epoch of training only.
    If set to -1, none of the agents is tested during training.
  n_skip_games: int 
    Number of epochs after which the user can choose to play 
    against one of the learning agents. If set to 1000, 
    each 1000 games, the user can choose to play against 
    one agent. If set to 0, the user can choose to play against one 
    agent at the last epoch only. If set to -1, no choice is offered 
    and the user cannot test any agent.
  verbose: boolean
    If set to True, each game action during training has a 
    written explanation.

  Return
  ------
  learning_results: list
    Only significant with n_games_test > 0 (otherwise, empty list 
    by default). List of each n_epochs // freq_test epoch test results 
    against a Random Agent. Each test result is a list: 
    [current epoch, score of RL Agent, number of finished games, 
    n_games test].
  """

    # Learning agent
    agent1 = RLAgent(epsilon, gamma)
    if load_model is not None:
        agent1.load_model(load_model)

    # Choose opponent
    if random_opponent:
        agent2 = RandomAgent()
        time_limit = None
        print('Training vs Random')
    else:
        agent2 = RLAgent(epsilon, gamma)
        if load_model is not None:
            agent2.load_model(load_model)
        time_limit = None
        print('Training vs Self')

    start_idx = 0
    scores = [0, 0]

    # If the user only confronts the agent at the last epoch
    # or if no confrontation
    if n_skip_games in [-1, 0]:
        n_skip_games = n_epochs - n_skip_games

    # Boolean for game between the user and agent1 preceding a game
    # between agent1 and agent2
    play_checkpoint_usr = False

    # If there is a test of agent1 at the last epoch only or no test
    if freq_test in [-1, 0]:
        freq_test = n_epochs - freq_test

    # Number of games between agent1 and a Random Agent for testing
    n_games_test_mem = n_games_test
    learning_results = []

    # Start training
    print('Training epoch:')
    for epoch in range(1, n_epochs + 1):

        if epoch % (n_epochs // 10) == 0:
            print(epoch, '/', n_epochs)

        #Update boolean for playing with user
        play_checkpoint_usr = bool(epoch % n_skip_games == 0)
        if play_checkpoint_usr:
            # Print training status
            print('Number of games: ', epoch)
            print('Scores: ', scores)
            # Ask user to play
            play = int(input('Play ? (1 Yes | 0 No)\n'))
            play_checkpoint_usr = bool(play)

        # Update boolean for test
        n_games_test = int(epoch % freq_test == 0) * n_games_test_mem

        # Start game
        game_over, winner, test_results = game_2Agents(
            agent1,
            agent2,
            start_idx=start_idx,
            train=True,
            time_limit=time_limit,
            n_games_test=n_games_test,
            play_checkpoint_usr=play_checkpoint_usr,
            verbose=verbose)

        assert game_over, str('Game not over but new game' +
                              ' beginning during training')

        if winner in [0, 1]:
            scores[winner] += 1

        # Save test games of agent1 against a Random Agent
        if bool(n_games_test):
            assert len(test_results) != 0, \
            'Agent1 has been tested but there is no result of that.'
            learning_results.append(
                [epoch, test_results[2], test_results[0], test_results[1]])

        # Next round
        start_idx = 1 - start_idx

    # Save Q-function of agent1
    np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',')
    # Save stats for learning rate of agent1
    np.savetxt(str('Models/data/count_' + filename + '.csv'),
               agent1.count_state_action,
               delimiter=',')

    return learning_results