示例#1
0
    def __init__(self, model=None):

        self.encoder = FourPlaneEncoder()
        self.evaluation_history_old = []
        self.evaluation_history_ran = []
        self.rand_bot = RandomBot()
        if model:
            self.model = model
        else:
            self.model = self.init_model()
def gain_experience_against_rand_bot(bot, encoder, num_episodes):

    experience = []
    rbot = RandomBot()

    for i in range(num_episodes):
        # print('\rrunning episode {0}'.format(i),end='')
        episode = {'boards': [], 'moves': [], 'players': [], 'winner': 0}

        if i % 2 == 0:
            boards, moves, players, winner = simulate_game(bot, rbot, encoder)
            for i, player in enumerate(players):
                if player == 1:
                    del boards[i]
                    del moves[i]
                    del players[i]
        else:
            boards, moves, players, winner = simulate_game(rbot, bot, encoder)
            for i, player in enumerate(players):
                if player == 1:
                    del boards[i]
                    del moves[i]
                    del players[i]

        episode['boards'] = boards
        episode['moves'] = moves
        episode['players'] = players
        episode['winner'] = winner

        experience.append(episode)

    # print(' done')
    return experience
示例#3
0
def train(num_games: int = 100000, saved_bot_file=None):

    game_num = 0
    num_games_played = 0
    num_games_rl_bot_won = 0

    state_values = {}
    if saved_bot_file is None:
        rl_bot = RLBot('o')
    else:
        rl_bot_data = load_bot_data(saved_bot_file)
        rl_bot = RLBot(rl_bot_data["marker"], rl_bot_data["state_values"])

    while (game_num < num_games):

        game = TicTacToe()

        rl_bot_turn = randint(0, 1)

        if rl_bot.marker == 'o':
            random_bot = RandomBot('x')
        else:
            random_bot = RandomBot('o')

        last_outcome = -1
        turn = 1

        while last_outcome != TicTacToe.DRAW and last_outcome != TicTacToe.X_WINS and last_outcome != TicTacToe.O_WINS:
            if turn % 2 == rl_bot_turn:
                last_outcome = rl_bot.play_move(game)
            else:
                last_outcome = random_bot.play_move(game)
            turn += 1

        rl_bot.update_end_position(last_outcome)

        num_games_played += 1

        if last_outcome == TicTacToe.O_WINS:
            num_games_rl_bot_won += 1

        print("RL Bot Winning Percentage",
              num_games_rl_bot_won / num_games_played)
        #print(len(rl_bot.state_values))
        game_num += 1
def evaluate_against_rand_bot(model, num_games):
    bot_rd = RandomBot()
    bot_nn = DeepLearningBot(model)
    player_nn = 1
    score = 0

    for i in range(num_games):
        # print('\rplaying game {0}'.format(i), end='')
        game = GameState.new_game()

        while game.is_not_over():
            if game.player == player_nn:
                action = bot_nn.select_move(game)
            else:
                action = bot_rd.select_move(game)

            game.take_turn_with_no_checks(action)

        score += player_nn * game.winner
        player_nn *= -1

    # print(' done')
    return score / num_games
示例#5
0
def play_game():
    game = TicTacToe()

    bot1 = RandomBot('o')

    bot2 = RandomBot('x')

    last_move = -1
    turn = 1

    while last_move != TicTacToe.DRAW and last_move != TicTacToe.X_WINS and last_move != TicTacToe.O_WINS:
        if turn % 2 == 1:
            last_move = bot1.play_move(game)
        else:
            last_move = bot2.play_move(game)
        turn += 1
        game.print_board()
示例#6
0
class ActorCriticBot():
    def __init__(self, model=None):

        self.encoder = FourPlaneEncoder()
        self.evaluation_history_old = []
        self.evaluation_history_ran = []
        self.rand_bot = RandomBot()
        if model:
            self.model = model
        else:
            self.model = self.init_model()

    def init_model(cls):

        # Create the network
        board_input = Input(shape=(7, 7, 4), name='board_input')

        # conv1 = Conv2D(64, (3, 3),
        #                padding='same',
        #                activation='sigmoid')(board_input)

        # conv2 = Conv2D(64, (3, 3),
        #                padding='same',
        #                activation='sigmoid')(conv1)

        flat = Flatten()(board_input)
        hidden_board1 = Dense(512, activation='sigmoid')(flat)
        hidden_board2 = Dense(512, activation='sigmoid')(hidden_board1)
        processed_board = Dense(512, activation='sigmoid')(hidden_board2)

        policy_hidden = Dense(512, activation='sigmoid')(processed_board)
        policy_output = Dense(96, activation='softmax')(policy_hidden)

        value_hidden = Dense(512, activation='sigmoid')(processed_board)
        value_output = Dense(1, activation='tanh')(value_hidden)

        model = Model(inputs=board_input,
                      outputs=[policy_output, value_output])

        # Complie model
        model.compile(optimizer=SGD(lr=0.001),
                      loss=['categorical_crossentropy', 'mse'],
                      loss_weights=[1.0, 1.0])

        return model

    def select_move(self, game_state):

        move_probs, move_val, input_tensor = self.predict(game_state)

        move_probs = move_probs**3
        move_probs /= np.sum(move_probs)

        eps = 1e-6
        move_probs = np.clip(move_probs, eps, 1 - eps)
        move_probs /= np.sum(move_probs)

        candidates = np.arange(96)
        ranked_moves = np.random.choice(candidates,
                                        96,
                                        replace=False,
                                        p=move_probs)

        pieces = np.sum(input_tensor[:, :, :, :2], -1).reshape(7, 7)

        for move_idx in ranked_moves:
            move = self.encoder.decode_move_index(pieces, move_idx)
            if not game_state.is_move_illegal(move):
                return Act.play(move), move_val

        return Act.pass_turn(), None

    def predict(self, game_state):
        input_tensor = self.encoder.encode(game_state).reshape(1, 7, 7, 4)
        probs, value = self.model.predict(input_tensor)
        return probs[0], value[0][0], input_tensor

    def evaluate_against_rand_bot(self, num_games):
        act_crit_player = 1
        score = 0
        num_games_won_as_black = 0
        num_games_won_as_white = 0

        for i in range(num_games):
            print('\rEvaluating against rand bot: game {0}'.format(i), end='')
            game = GameState.new_game()

            max_num_of_turns = 1000
            turns_taken = 0

            while game.is_not_over() and turns_taken < max_num_of_turns:
                if game.player == act_crit_player:
                    action, value = self.select_move(game)
                else:
                    action = self.rand_bot.select_move(game)

                game.take_turn_with_no_checks(action)
                turns_taken += 1

            if turns_taken < max_num_of_turns:
                score += act_crit_player * game.winner

                if act_crit_player == game.winner:
                    if act_crit_player == 1:
                        num_games_won_as_white += 1
                    else:
                        num_games_won_as_black += 1

                act_crit_player *= -1

            else:
                score -= 1
                act_crit_player *= -1

        # Save the evaluation score of the bot along with fraction of games
        # won as black/white and the total number of games
        self.evaluation_history_ran.append([
            score / num_games, 2 * num_games_won_as_white / num_games,
            2 * num_games_won_as_black / num_games, num_games
        ])

    def evaluate_against_old_bot(self, num_games):

        model = load_model('old_actor_critic_model.h5')
        old_bot = ActorCriticBot(model)

        act_crit_player = 1
        score = 0
        num_games_won_as_black = 0
        num_games_won_as_white = 0

        for i in range(num_games):
            print('\rEvaluating against old bot: game {0}'.format(i), end='')
            game = GameState.new_game()

            max_num_of_turns = 1000
            turns_taken = 0

            while game.is_not_over() and turns_taken < max_num_of_turns:
                if game.player == act_crit_player:
                    action, value = self.select_move(game)
                else:
                    action, value = old_bot.select_move(game)

                game.take_turn_with_no_checks(action)
                turns_taken += 1

            if turns_taken < max_num_of_turns:
                score += act_crit_player * game.winner

                if act_crit_player == game.winner:
                    if act_crit_player == 1:
                        num_games_won_as_white += 1
                    else:
                        num_games_won_as_black += 1

                act_crit_player *= -1

            else:
                score -= 1
                act_crit_player *= -1

        self.evaluation_history_old.append([
            score / num_games, 2 * num_games_won_as_white / num_games,
            2 * num_games_won_as_black / num_games, num_games
        ])

    def save_bot(self):
        self.model.save('actor_critic_model.h5')
        np.save('eval_history_old.npy', self.evaluation_history_old)
        np.save('eval_history_ran.npy', self.evaluation_history_ran)

    def load_bot(self):
        self.model = load_model('actor_critic_model.h5')
        self.evaluation_history_old = list(np.load('eval_history_old.npy'))
        self.evaluation_history_ran = list(np.load('eval_history_ran.npy'))

    def save_as_old_bot(self):
        self.model.save('old_actor_critic_model.h5')
示例#7
0
                      str(max_nodes) + ' nodes')
            possible_plays = None
            play_values = None
        else:
            possible_plays = [child.last_play for child in self.root.children]
            play_values = [child.score for child in self.root.children]
        return possible_plays, play_values


if __name__ == '__main__':
    pass
    from piece import read_pieces
    from random_bot import RandomBot
    from game import Game
    pieces = read_pieces(PIECES_FILE)
    players = [RandomBot(i) for i in range(NUM_PLAYERS)]
    test_board = [
        [255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255],
        [0, 4, 0, 4, 4, 0, 0, 0, 4, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0],
        [0, 4, 4, 0, 4, 0, 0, 0, 4, 0, 4, 4, 0, 0, 8, 0, 0, 0, 8, 8, 8, 0],
        [0, 0, 0, 0, 4, 0, 0, 4, 4, 4, 8, 8, 0, 0, 8, 8, 0, 0, 8, 0, 0, 0],
        [0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 8, 8, 0, 8, 0, 8, 8, 0, 8, 0, 0],
        [0, 0, 4, 0, 4, 4, 0, 0, 8, 0, 0, 8, 0, 8, 0, 0, 8, 8, 0, 0, 0, 0],
        [0, 4, 4, 4, 0, 4, 0, 0, 8, 8, 8, 0, 8, 8, 0, 0, 0, 8, 0, 8, 0, 0],
        [0, 0, 0, 0, 4, 0, 0, 4, 8, 0, 0, 8, 8, 4, 0, 0, 8, 0, 8, 8, 0, 0],
        [0, 0, 1, 0, 4, 4, 4, 0, 0, 8, 4, 0, 4, 4, 4, 8, 8, 0, 8, 2, 2, 0],
        [0, 1, 1, 0, 0, 0, 4, 0, 8, 8, 4, 4, 2, 4, 0, 8, 0, 2, 2, 8, 8, 0],
        [0, 1, 1, 4, 4, 4, 0, 4, 0, 0, 0, 4, 2, 2, 0, 8, 0, 2, 0, 0, 8, 0],
        [0, 0, 0, 1, 1, 1, 0, 4, 4, 4, 4, 0, 1, 2, 2, 0, 0, 2, 2, 0, 8, 0],
        [0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 8, 0, 2, 8, 0],
        [0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 2, 2, 8, 8, 8, 0, 0],
示例#8
0
 def __init__(self, num_rounds, temp):
     self.num_rounds = num_rounds
     self.temperature = temp
     self.bot = RandomBot()
示例#9
0
class MCTSBot:
    """
    This bot plays brandubh using a Monte Carlo tree search to select its
    move. An intance of this class is initialised with two parameters.
    1) num_rounds - the number of nodes to be added to the tree
    2) temperature- effects the balance between exploration and exploitation
                    when picking child nodes with uct score
    When an instance is created, and RandomBot object is also created as
    an instance variable. This is used for simulating random games
    
    Monte Carlo tree search:
        This algorithm builds a tree data structure. Each node of the tree
        represents a possible move following the move represented by the
        node's parent node. The root node of the tree is the current board
        position the bot is trying to decide a move for. 
        
        The algorithm starts with a root node and then adds child nodes to
        the rooth until all no more can be added to the root node (i.e. all
        legal moves from the current board position have been added). Once 
        all possible child nodes have been added to a node, the algorithm
        picks a child node at random (using uct score) to add a child node to.
        
        Each time a child node is added to the tree, a random game is played 
        until the a winner is decided, begining from the board position 
        represented by the new child node. The winner is saved in the node
        and passed to all parent nodes. So that each node in the tree has a
        record of how many random games the black or white player won that
        began from a board position that stemmed from that node. The ratio
        of black wins vs white wins gives a way of ranking how good a move
        is for a particular player.
        
        This continues until a given number of nodes ('num_rounds') are
        added to the tree.
        
        The child node of the root with the best ranking is selected as the
        next move
    """
    def __init__(self, num_rounds, temp):
        self.num_rounds = num_rounds
        self.temperature = temp
        self.bot = RandomBot()

    def select_move(self, game_state):
        """
        This method uses the Monte Carlo tree search to select what move
        to make next given the board position in game_state.
        """

        # Create the root of the tree.
        root = MCTSNode(game_state)

        # add num_rounds nodes to the tree.
        for i in range(self.num_rounds):
            # print('\rrunning rollout {0}'.format(i), end='')

            # To add a child node, begin at the root of the tree.
            node = root

            # While child nodes can't be added to the current node and
            # the current node doesn't represent a game state where the
            # game is over, select a child as the current node using uct
            while (not node.can_add_child()) and (not node.is_terminal()):
                if node.children == []:
                    break
                node = self.select_child(node)

            # Add a random child node if possible
            if node.can_add_child():
                node = node.add_random_child()

            # Simulate a random game from the current board position, record
            # the winner and pass it back to all parent nodes
            winner = self.simulate_random_game(node.game_state)
            while node is not None:
                node.record_win(winner)
                node = node.parent

        # Once 'num_rounds' nodes have been added to the tree, select the
        # child node of the root with the best ranking as the next move
        # print('finished rollouts')
        best_move = None
        best_frac = -1
        for child in root.children:
            child_frac = child.winning_frac(game_state.player)
            if child_frac > best_frac:
                best_frac = child_frac
                best_move = child.move

        # return the best move
        if best_move is None:
            return Act.pass_turn()
        return Act.play(best_move)

    def select_child(self, node):
        """
        This method selects a child with the best uct score
        """
        total_rollouts = sum(child.num_rollouts for child in node.children)

        best_score = -1
        best_child = None

        for child in node.children:
            score = uct_score(total_rollouts, child.num_rollouts,
                              child.winning_frac(node.game_state.player),
                              self.temperature)
            if score > best_score:
                best_score = score
                best_child = child

        return best_child

    def simulate_random_game(self, game_state):
        """
        This method plays a game of brandubh begining from the board position
        in game_state and plays until a winner is decided. At each turn,
        moves are selected at random. The method returns the winner of the
        game when it is over.
        """
        game = copy.deepcopy(game_state)

        while game.is_not_over():
            random_move = self.bot.select_move(game)
            game.take_turn(random_move)

        return game.winner
示例#10
0
"""
Running this file will start a 4-(human)-player game of Blokus.
Each turn, a player must enter a piece ID (0-20),
a piece orientation (0-7), representing 90 degree CCW rotations from 0-3, and a flip followed by rotations from 4-7,
a column and row corresponding to the top left of the piece's bounding box (even if that is a hole in the piece).
Note that the origin of the board is also the top left.

Alternately, a player can enter -1 as a piece ID to retire with whatever score they have.
"""

import numpy as np
from constants import *
from board import *
from player import Player
from random_bot import RandomBot
from mcts import MCTSBot
from stat_calculator import *
from game import *
from piece import *

if __name__ == '__main__':
    pieces = read_pieces(PIECES_FILE)
    #players = [RandomBot(i) for i in range(NUM_PLAYERS)]
    players = [RandomBot(i) for i in range(3)
               ] + [MCTSBot(3, pieces, 5.0, 1.414, 'ucb1', None)]
    if TRACK_STATS:
        calc_stats(pieces, players)
    else:
        play_game(pieces, players)