def random_game_generator() -> tuple: """Generate random valid board obtained by some moves from initial board. the maximim number of moves in the board generated is 20 Returns ------- tuple a random game first element is a random valid board. second element is turn's number. third element is draw counter. """ current_board = initial_board_generator() num_of_turns = random.randint(0, 10) draw_counter = 0 colour = {1: 'white', 0: 'black'} for turn in range(1, num_of_turns): # number of disks before the move: size_before = current_board.get_number_of_disks(None) boards = [] Moves.get_all_next_boards(current_board, colour[turn % 2], boards) # get the next board current_board = random.choice(boards) # number of disks before the move: size_after = current_board.get_number_of_disks(None) draw_counter = update_draw_counter(draw_counter, size_before, size_after) return current_board, num_of_turns, draw_counter
def choose_board(self, board: Board, turn: int, draw_counter: int) -> Board: """Choose the best board by applying the best move on the given board. Parameters ---------- board : Board the current board. turn : int turn's number. draw_counter : int counter of non-attack moves. Returns ------- Board the new board, which is the result of applying the best valid move on the given board. """ # generate all possible boards boards = [] Moves.get_all_next_boards(board, self._colour, boards) # get the fitness of every board values = self.get_fitness(boards, turn + 1, draw_counter) # get the id of the best board # because our evaluation function is predict how good # a board is for white, then when we play as 'black' # we must takes the minimum to put the 'white' in the worst # possible scenario. if self._colour == 'black': i = np.argmin(values) else: i = np.argmax(values) return boards[i]
def get_board(board, loc1, loc2): boards = [] Moves.get_next_boards(board, loc1, boards) for b in boards: if b.get_disk_at(loc2) is not None: return b return None
def _max(self, board: Board, depth: int, turn: int, draw_counter: int, alpha: float, beta: float) -> float: self._tot_num += 1 # increase the number of explored nodes. # apply dynamic path feature if it's set to true. # please note that this feature is under tests. if self._dynamic_depth is True: if self._tot_num > MiniMaxAlphaBetaSystem.maximum_nodes_number: return np.sum( self._pred_system.predict([board], turn, draw_counter)) # get the status if current node. status = self._terminal_state(board, turn, draw_counter) if status is not None: # lose for black is win for white and vice versa. sign = 1 if turn % 2 == 1 else -1 if status == 'win': return 100 * sign elif status == 'lose': return -100 * sign else: return 0 # if we reach the maximum depth then stop and return # a prediction of the current board state. if depth >= self._depth: return np.sum( self._pred_system.predict([board], turn, draw_counter)) # generate the next boards by applying the valid moves. boards = [] Moves.get_all_next_boards(board, 'white', boards) current_max = -1e7 size_before = board.get_number_of_disks(None) for b in boards: next_draw_counter = draw_counter size_after = b.get_number_of_disks(None) next_draw_counter = update_draw_counter(next_draw_counter, size_before, size_after) current_max = max( current_max, self._min(b, depth + 1, turn + 1, next_draw_counter, alpha, beta)) if current_max >= beta: return current_max alpha = max(alpha, current_max) return current_max
def generate_random_training_set() -> tuple: colour = {1: 'white', 0: 'black'} current_board = initial_board_generator() boards = [] # list of white board positions through the game. # 2-tuple # first element is the status (i.e 'win', 'lose', or draw). # second element is the colour of the player with the final status. final_status = (None, None) start_turn = 1 draw_counter = 0 for turn in range(start_turn, 10000): # store the boards where its white turn. if colour[turn % 2] == 'white': boards.append(current_board) # get status of the game, 'None' indicates that game is still going status = current_board.get_status(colour[turn % 2], draw_counter) # if the game ends set the right values to final_status # then exit the game if status is not None: final_status = (status, colour[turn % 2]) break # number of disks before the move: size_before = current_board.get_number_of_disks(None) # get the next board # this done by letting the agent choose the move. next_boards = [] Moves.get_all_next_boards(current_board, colour[turn % 2], next_boards) current_board = random.choice(next_boards) # number of disks before the move: size_after = current_board.get_number_of_disks(None) draw_counter = update_draw_counter(draw_counter, size_before, size_after) # get the final status for white if final_status[1] == 'black' and final_status[0] != 'draw': final_status = ('win' if final_status[0] == 'lose' else 'lose', final_status[1]) return boards, final_status[0]
def _min(self, board: Board, depth: int, turn: int, draw_counter: int, alpha: float, beta: float) -> float: self._tot_num += 1 if self._dynamic_depth is True: if self._tot_num > MiniMaxAlphaBetaSystem.maximum_nodes_number: return np.sum( self._pred_system.predict([board], turn, draw_counter)) status = self._terminal_state(board, turn, draw_counter) if status is not None: sign = 1 if turn % 2 == 1 else -1 if status == 'win': return 100 * sign elif status == 'lose': return -100 * sign else: return 0 if depth >= self._depth: return np.sum( self._pred_system.predict([board], turn, draw_counter)) boards = [] Moves.get_all_next_boards(board, 'black', boards) current_min = 1e7 size_before = board.get_number_of_disks(None) for b in boards: next_draw_counter = draw_counter size_after = b.get_number_of_disks(None) next_draw_counter = update_draw_counter(next_draw_counter, size_before, size_after) current_min = min( current_min, self._max(b, depth + 1, turn + 1, next_draw_counter, alpha, beta)) if current_min <= alpha: return current_min beta = min(beta, current_min) return current_min
def _f6_number_of_pieces_threatened_by_black(self, board: Board) -> float: threatened = dict() boards = [] Moves.get_all_next_boards(board, 'black', boards, threatened) return len(threatened.keys())
def train(agent: Agent, num_of_games: int, initial_game: tuple = None, explore_probability: float = 10, output: bool = False, results_output: bool = True, plots: bool = True) -> tuple: """Train the agent by making it plays games against its self. Parameters ---------- agent : Agent The agent to train. num_of_games : int number of training games. initial_game : tuple a starting game first element is an initial board. second element is turn's number. third element is draw counter. the default is None. explore_probability : int the probability that the agent choose to explore a random move to explore instead of playing the optimal move he learnt so far. if it's None then no exploration will happened. the probability will be 1/explore_probability. the default is 0.1. output: bool, optional indicates if you want to print the game or not. the default is False. results_output: bool, optional indicates if you want to print the game's results or not. the default is True. plots: bool, optional indicates if you want to print the plots for errors in prediction or not. the default is True. Returns ------- tuple first element, list: i-th element is the error after updating parameters using i-th game second element, list: i-th element is the result for on the i-th game. """ costs = [] cli = CLI() # object to use the console interface if output is True. # dictionary for convert status into numbers. d = {'win': 1, 'lose': -1, 'draw': 0} # dictionary for knowing the colour of the current player # 1 is the first player # 0 is the second player colour = {1: 'white', 0: 'black'} # used to restore the agent colour after training end # thats because we are updating evaluation based on white position. previous_colour = agent.get_colour() results = [] # store the results of the games. # the probability that the agent will choose random move. prob = explore_probability data_set = {'board': [], 'f_status': []} for i in range(num_of_games): boards = [] # list of white board positions through the game. if initial_game is None: # generate the initial board current_board = initial_board_generator() start_turn = 1 draw_counter = 0 else: current_board, start_turn, draw_counter = initial_game # 2-tuple # first element is the status (i.e 'win', 'lose', or draw). # second element is the colour of the player with the final status. final_status = (None, None) for turn in range(start_turn, 10000): # alternate the roles of the same agent by setting its colour # to the colour of the player who should move in the current turn agent.set_colour(colour[turn % 2]) # store the boards when its white turn. if colour[turn % 2] == 'white': boards.append(current_board) # get status of the game, 'None' indicates that game is still going status = current_board.get_status(colour[turn % 2], draw_counter) # print the turn number and the board if we need them. if output is True: print(f'turn: {turn}') cli.show(current_board) # if the game ends set the right values to final_status # then exit the game if status is not None: final_status = (status, colour[turn % 2]) break # number of disks before the move: size_before = current_board.get_number_of_disks(None) # get the next board # this done by letting the agent choose the move. if prob is not None: r = random.randint(1, prob) if prob is not None and r == 1: next_boards = [] Moves.get_all_next_boards(current_board, colour[turn % 2], next_boards) current_board = random.choice(next_boards) else: current_board = agent.choose_board(current_board, turn, draw_counter) # number of disks before the move: size_after = current_board.get_number_of_disks(None) draw_counter = update_draw_counter(draw_counter, size_before, size_after) # get the final status for white if final_status[1] == 'black' and final_status[0] != 'draw': final_status = ('win' if final_status[0] == 'lose' else 'lose', final_status[1]) # add the cost before updating. costs.append(agent.get_system().compute_error(boards, final_status[0])) # let the agent learn using the boards positions through the game. # agent.learn(boards, final_status[0]) # add the result of the game for white player results.append(d[final_status[0]]) data_set['board'].append(boards) data_set['f_status'].append(final_status[0]) if (i + 1) % 100 == 0: print(f'{i+1} games finished unitl now!') # reset the colour of the agent agent.set_colour(previous_colour) # let the agent learn from the previous games m = min(results.count(-1), results.count(1)) lose_m = m win_m = m for i in range(num_of_games): bs = data_set['board'][i] fs = data_set['f_status'][i] if fs == 'win' and win_m > 0: agent.learn(bs, fs) win_m -= 1 elif fs == 'lose' and lose_m > 0: agent.learn(bs, fs) lose_m -= 1 elif fs == 'draw': agent.learn(bs, fs) if results_output is True: wins = results.count(1) loses = results.count(-1) draws = results.count(0) print(f'training results of agent {agent.get_name()}') print(f'wins: {wins}') print(f'loses: {loses}') print(f'draws: {draws}') print('##################################') print(f'cost: {costs[-1]}') if plots is True: print_iter = num_of_games // num_of_games plt.plot(np.arange(0, num_of_games, print_iter + 1), costs[::print_iter + 1], 'k') plt.show() return costs, results