def __init__(self, model=None): self.encoder = FourPlaneEncoder() self.evaluation_history_old = [] self.evaluation_history_ran = [] self.rand_bot = RandomBot() if model: self.model = model else: self.model = self.init_model()
def gain_experience_against_rand_bot(bot, encoder, num_episodes): experience = [] rbot = RandomBot() for i in range(num_episodes): # print('\rrunning episode {0}'.format(i),end='') episode = {'boards': [], 'moves': [], 'players': [], 'winner': 0} if i % 2 == 0: boards, moves, players, winner = simulate_game(bot, rbot, encoder) for i, player in enumerate(players): if player == 1: del boards[i] del moves[i] del players[i] else: boards, moves, players, winner = simulate_game(rbot, bot, encoder) for i, player in enumerate(players): if player == 1: del boards[i] del moves[i] del players[i] episode['boards'] = boards episode['moves'] = moves episode['players'] = players episode['winner'] = winner experience.append(episode) # print(' done') return experience
def train(num_games: int = 100000, saved_bot_file=None): game_num = 0 num_games_played = 0 num_games_rl_bot_won = 0 state_values = {} if saved_bot_file is None: rl_bot = RLBot('o') else: rl_bot_data = load_bot_data(saved_bot_file) rl_bot = RLBot(rl_bot_data["marker"], rl_bot_data["state_values"]) while (game_num < num_games): game = TicTacToe() rl_bot_turn = randint(0, 1) if rl_bot.marker == 'o': random_bot = RandomBot('x') else: random_bot = RandomBot('o') last_outcome = -1 turn = 1 while last_outcome != TicTacToe.DRAW and last_outcome != TicTacToe.X_WINS and last_outcome != TicTacToe.O_WINS: if turn % 2 == rl_bot_turn: last_outcome = rl_bot.play_move(game) else: last_outcome = random_bot.play_move(game) turn += 1 rl_bot.update_end_position(last_outcome) num_games_played += 1 if last_outcome == TicTacToe.O_WINS: num_games_rl_bot_won += 1 print("RL Bot Winning Percentage", num_games_rl_bot_won / num_games_played) #print(len(rl_bot.state_values)) game_num += 1
def evaluate_against_rand_bot(model, num_games): bot_rd = RandomBot() bot_nn = DeepLearningBot(model) player_nn = 1 score = 0 for i in range(num_games): # print('\rplaying game {0}'.format(i), end='') game = GameState.new_game() while game.is_not_over(): if game.player == player_nn: action = bot_nn.select_move(game) else: action = bot_rd.select_move(game) game.take_turn_with_no_checks(action) score += player_nn * game.winner player_nn *= -1 # print(' done') return score / num_games
def play_game(): game = TicTacToe() bot1 = RandomBot('o') bot2 = RandomBot('x') last_move = -1 turn = 1 while last_move != TicTacToe.DRAW and last_move != TicTacToe.X_WINS and last_move != TicTacToe.O_WINS: if turn % 2 == 1: last_move = bot1.play_move(game) else: last_move = bot2.play_move(game) turn += 1 game.print_board()
class ActorCriticBot(): def __init__(self, model=None): self.encoder = FourPlaneEncoder() self.evaluation_history_old = [] self.evaluation_history_ran = [] self.rand_bot = RandomBot() if model: self.model = model else: self.model = self.init_model() def init_model(cls): # Create the network board_input = Input(shape=(7, 7, 4), name='board_input') # conv1 = Conv2D(64, (3, 3), # padding='same', # activation='sigmoid')(board_input) # conv2 = Conv2D(64, (3, 3), # padding='same', # activation='sigmoid')(conv1) flat = Flatten()(board_input) hidden_board1 = Dense(512, activation='sigmoid')(flat) hidden_board2 = Dense(512, activation='sigmoid')(hidden_board1) processed_board = Dense(512, activation='sigmoid')(hidden_board2) policy_hidden = Dense(512, activation='sigmoid')(processed_board) policy_output = Dense(96, activation='softmax')(policy_hidden) value_hidden = Dense(512, activation='sigmoid')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden) model = Model(inputs=board_input, outputs=[policy_output, value_output]) # Complie model model.compile(optimizer=SGD(lr=0.001), loss=['categorical_crossentropy', 'mse'], loss_weights=[1.0, 1.0]) return model def select_move(self, game_state): move_probs, move_val, input_tensor = self.predict(game_state) move_probs = move_probs**3 move_probs /= np.sum(move_probs) eps = 1e-6 move_probs = np.clip(move_probs, eps, 1 - eps) move_probs /= np.sum(move_probs) candidates = np.arange(96) ranked_moves = np.random.choice(candidates, 96, replace=False, p=move_probs) pieces = np.sum(input_tensor[:, :, :, :2], -1).reshape(7, 7) for move_idx in ranked_moves: move = self.encoder.decode_move_index(pieces, move_idx) if not game_state.is_move_illegal(move): return Act.play(move), move_val return Act.pass_turn(), None def predict(self, game_state): input_tensor = self.encoder.encode(game_state).reshape(1, 7, 7, 4) probs, value = self.model.predict(input_tensor) return probs[0], value[0][0], input_tensor def evaluate_against_rand_bot(self, num_games): act_crit_player = 1 score = 0 num_games_won_as_black = 0 num_games_won_as_white = 0 for i in range(num_games): print('\rEvaluating against rand bot: game {0}'.format(i), end='') game = GameState.new_game() max_num_of_turns = 1000 turns_taken = 0 while game.is_not_over() and turns_taken < max_num_of_turns: if game.player == act_crit_player: action, value = self.select_move(game) else: action = self.rand_bot.select_move(game) game.take_turn_with_no_checks(action) turns_taken += 1 if turns_taken < max_num_of_turns: score += act_crit_player * game.winner if act_crit_player == game.winner: if act_crit_player == 1: num_games_won_as_white += 1 else: num_games_won_as_black += 1 act_crit_player *= -1 else: score -= 1 act_crit_player *= -1 # Save the evaluation score of the bot along with fraction of games # won as black/white and the total number of games self.evaluation_history_ran.append([ score / num_games, 2 * num_games_won_as_white / num_games, 2 * num_games_won_as_black / num_games, num_games ]) def evaluate_against_old_bot(self, num_games): model = load_model('old_actor_critic_model.h5') old_bot = ActorCriticBot(model) act_crit_player = 1 score = 0 num_games_won_as_black = 0 num_games_won_as_white = 0 for i in range(num_games): print('\rEvaluating against old bot: game {0}'.format(i), end='') game = GameState.new_game() max_num_of_turns = 1000 turns_taken = 0 while game.is_not_over() and turns_taken < max_num_of_turns: if game.player == act_crit_player: action, value = self.select_move(game) else: action, value = old_bot.select_move(game) game.take_turn_with_no_checks(action) turns_taken += 1 if turns_taken < max_num_of_turns: score += act_crit_player * game.winner if act_crit_player == game.winner: if act_crit_player == 1: num_games_won_as_white += 1 else: num_games_won_as_black += 1 act_crit_player *= -1 else: score -= 1 act_crit_player *= -1 self.evaluation_history_old.append([ score / num_games, 2 * num_games_won_as_white / num_games, 2 * num_games_won_as_black / num_games, num_games ]) def save_bot(self): self.model.save('actor_critic_model.h5') np.save('eval_history_old.npy', self.evaluation_history_old) np.save('eval_history_ran.npy', self.evaluation_history_ran) def load_bot(self): self.model = load_model('actor_critic_model.h5') self.evaluation_history_old = list(np.load('eval_history_old.npy')) self.evaluation_history_ran = list(np.load('eval_history_ran.npy')) def save_as_old_bot(self): self.model.save('old_actor_critic_model.h5')
str(max_nodes) + ' nodes') possible_plays = None play_values = None else: possible_plays = [child.last_play for child in self.root.children] play_values = [child.score for child in self.root.children] return possible_plays, play_values if __name__ == '__main__': pass from piece import read_pieces from random_bot import RandomBot from game import Game pieces = read_pieces(PIECES_FILE) players = [RandomBot(i) for i in range(NUM_PLAYERS)] test_board = [ [255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255], [0, 4, 0, 4, 4, 0, 0, 0, 4, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0], [0, 4, 4, 0, 4, 0, 0, 0, 4, 0, 4, 4, 0, 0, 8, 0, 0, 0, 8, 8, 8, 0], [0, 0, 0, 0, 4, 0, 0, 4, 4, 4, 8, 8, 0, 0, 8, 8, 0, 0, 8, 0, 0, 0], [0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 8, 8, 0, 8, 0, 8, 8, 0, 8, 0, 0], [0, 0, 4, 0, 4, 4, 0, 0, 8, 0, 0, 8, 0, 8, 0, 0, 8, 8, 0, 0, 0, 0], [0, 4, 4, 4, 0, 4, 0, 0, 8, 8, 8, 0, 8, 8, 0, 0, 0, 8, 0, 8, 0, 0], [0, 0, 0, 0, 4, 0, 0, 4, 8, 0, 0, 8, 8, 4, 0, 0, 8, 0, 8, 8, 0, 0], [0, 0, 1, 0, 4, 4, 4, 0, 0, 8, 4, 0, 4, 4, 4, 8, 8, 0, 8, 2, 2, 0], [0, 1, 1, 0, 0, 0, 4, 0, 8, 8, 4, 4, 2, 4, 0, 8, 0, 2, 2, 8, 8, 0], [0, 1, 1, 4, 4, 4, 0, 4, 0, 0, 0, 4, 2, 2, 0, 8, 0, 2, 0, 0, 8, 0], [0, 0, 0, 1, 1, 1, 0, 4, 4, 4, 4, 0, 1, 2, 2, 0, 0, 2, 2, 0, 8, 0], [0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 8, 0, 2, 8, 0], [0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 2, 2, 8, 8, 8, 0, 0],
def __init__(self, num_rounds, temp): self.num_rounds = num_rounds self.temperature = temp self.bot = RandomBot()
class MCTSBot: """ This bot plays brandubh using a Monte Carlo tree search to select its move. An intance of this class is initialised with two parameters. 1) num_rounds - the number of nodes to be added to the tree 2) temperature- effects the balance between exploration and exploitation when picking child nodes with uct score When an instance is created, and RandomBot object is also created as an instance variable. This is used for simulating random games Monte Carlo tree search: This algorithm builds a tree data structure. Each node of the tree represents a possible move following the move represented by the node's parent node. The root node of the tree is the current board position the bot is trying to decide a move for. The algorithm starts with a root node and then adds child nodes to the rooth until all no more can be added to the root node (i.e. all legal moves from the current board position have been added). Once all possible child nodes have been added to a node, the algorithm picks a child node at random (using uct score) to add a child node to. Each time a child node is added to the tree, a random game is played until the a winner is decided, begining from the board position represented by the new child node. The winner is saved in the node and passed to all parent nodes. So that each node in the tree has a record of how many random games the black or white player won that began from a board position that stemmed from that node. The ratio of black wins vs white wins gives a way of ranking how good a move is for a particular player. This continues until a given number of nodes ('num_rounds') are added to the tree. The child node of the root with the best ranking is selected as the next move """ def __init__(self, num_rounds, temp): self.num_rounds = num_rounds self.temperature = temp self.bot = RandomBot() def select_move(self, game_state): """ This method uses the Monte Carlo tree search to select what move to make next given the board position in game_state. """ # Create the root of the tree. root = MCTSNode(game_state) # add num_rounds nodes to the tree. for i in range(self.num_rounds): # print('\rrunning rollout {0}'.format(i), end='') # To add a child node, begin at the root of the tree. node = root # While child nodes can't be added to the current node and # the current node doesn't represent a game state where the # game is over, select a child as the current node using uct while (not node.can_add_child()) and (not node.is_terminal()): if node.children == []: break node = self.select_child(node) # Add a random child node if possible if node.can_add_child(): node = node.add_random_child() # Simulate a random game from the current board position, record # the winner and pass it back to all parent nodes winner = self.simulate_random_game(node.game_state) while node is not None: node.record_win(winner) node = node.parent # Once 'num_rounds' nodes have been added to the tree, select the # child node of the root with the best ranking as the next move # print('finished rollouts') best_move = None best_frac = -1 for child in root.children: child_frac = child.winning_frac(game_state.player) if child_frac > best_frac: best_frac = child_frac best_move = child.move # return the best move if best_move is None: return Act.pass_turn() return Act.play(best_move) def select_child(self, node): """ This method selects a child with the best uct score """ total_rollouts = sum(child.num_rollouts for child in node.children) best_score = -1 best_child = None for child in node.children: score = uct_score(total_rollouts, child.num_rollouts, child.winning_frac(node.game_state.player), self.temperature) if score > best_score: best_score = score best_child = child return best_child def simulate_random_game(self, game_state): """ This method plays a game of brandubh begining from the board position in game_state and plays until a winner is decided. At each turn, moves are selected at random. The method returns the winner of the game when it is over. """ game = copy.deepcopy(game_state) while game.is_not_over(): random_move = self.bot.select_move(game) game.take_turn(random_move) return game.winner
""" Running this file will start a 4-(human)-player game of Blokus. Each turn, a player must enter a piece ID (0-20), a piece orientation (0-7), representing 90 degree CCW rotations from 0-3, and a flip followed by rotations from 4-7, a column and row corresponding to the top left of the piece's bounding box (even if that is a hole in the piece). Note that the origin of the board is also the top left. Alternately, a player can enter -1 as a piece ID to retire with whatever score they have. """ import numpy as np from constants import * from board import * from player import Player from random_bot import RandomBot from mcts import MCTSBot from stat_calculator import * from game import * from piece import * if __name__ == '__main__': pieces = read_pieces(PIECES_FILE) #players = [RandomBot(i) for i in range(NUM_PLAYERS)] players = [RandomBot(i) for i in range(3) ] + [MCTSBot(3, pieces, 5.0, 1.414, 'ucb1', None)] if TRACK_STATS: calc_stats(pieces, players) else: play_game(pieces, players)