def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights")
def sort_actions(self, actions): action_heap = [] result = [] for action in actions: self.board.update_board(action, self.player) val = Evaluation.basic_policy(self.board, self.player) self.undo_move() heapq.heappush(action_heap, (val, action)) heapq._heapify_max(action_heap) while len(action_heap) > 0: result.append(heapq._heappop_max(action_heap)[1]) return result
class Negascout(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negascout(self): colour = self.player # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: if self.board.phase == constant.PLACEMENT_PHASE: # clear the transposition table every time we want to evaluate a move in placement phase # this is to limit the size of growth self.tt.clear() # set the max depth iterations based on the phase that we are in MAX_ITER = 5 else: MAX_ITER = 11 # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(colour) action_set = set(available_actions) if len(available_actions) == 0: return None if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = 1500 else: self.time_alloc = 1200 # if we have reached 100 moves in the game and the game if self.total_time > 90000 or self.board.move_counter > 120: self.time_alloc = 500 # if we are near the final shrinking phase, then we can decrease the time it has to # evaluate if self.board.move_counter > 150: self.time_alloc = 150 best_depth = 1 val, move = 0, None best_move = None self.time_rem = self.time_alloc # iterative deepening begins here for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_start = self.curr_millisecond_time() val, move = self.negascout(depth, -inf, inf, self.player) # move = self.negascout(depth,self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 if move is not None and move in action_set: best_move = move except TimeOut: print("TIMEOUT") break # add the time allocated to the total time self.total_time += self.time_alloc self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) def negascout(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print(move_to_try) #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, tt_best_move actions = self.board.update_actions(colour) # actions = actions_1 actions = self.board.sort_actions(actions, colour) #actions = actions_1 # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # print(len(actions)) # start negascout here for i, action in enumerate(favourable): # skip over the best action in the tt table if action == move_to_try and i > 0: continue elim = self.board.update_board(action, colour) # if we are at the first node -- this is the best node we have found so far # therefore we do a full search on this node if i == 0: # do a full search on the best move found so far score, _ = self.negascout(depth - 1, -beta, -alpha, opponent) score = -score else: # assume that the first move is the best move we have found so far, # therefore to see if this is the case we can do a null window search on the # rest of the moves, if the search breaks, then we know that the first move is # the best move and it will return the best move # but if the search "failed high" - i.e. the score is between alpha and beta # we need to do a full research of the node to work out the minimax value # do the null window search score, _ = self.negascout(depth - 1, -alpha - 1, -alpha, opponent) score = -score # if it failed high, then we just do a full search to find the actual best move if alpha < score < beta: score, _ = self.negascout(depth - 1, -beta, -score, opponent) score = -score # get the best value and score if best_val < score: best_val = score best_action = action # reset alpha if alpha < score: alpha = score # undo the action applied to the board -- we can now apply another move to the board self.undo_actions(action, colour, elim) # test for alpha beta cutoff if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour, actions): return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_actions(self, action, colour, elim): return self.board.undo_action(action, colour, elim)
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() MAX_ITER = 10 # default policy available_actions = self.board.update_actions(self.board, self.player) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' self.time_alloc = 5000 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 except TimeOut: print("TIMEOUT") break if Negamax.curr_millisecond_time() - start_time > self.time_alloc: break self.eval_depth = best_depth return move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} ''' move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str,colour,phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str,colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, None ''' # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player) #*dic[colour] return val, None # do the minimax search best_val = -inf best_action = None actions = self.board.update_actions(self.board, colour) ''' if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 ''' # get the favourable moves of the board actions = self.get_favourable_actions(self.available_actions) # if there are no favourable actions to iterate on - raise if len(actions) < 0: raise ReturnUnfavourableMove for action in actions: # skip over the best action in the tt table ''' if action == move_to_try and i!= 0: continue i+=1 ''' self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break ''' # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") ''' # add the entry to the transposition table # self.tt.add_entry(self.board.board_state,colour,best_val,tt_type,best_action, depth) return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour): #return Evaluation.basic_policy(board,colour) return self.evaluation.evaluate(board, self.player) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_move(self): return self.board.undo_move()
class Negamax(object): def __init__(self, board, colour, file_name): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # timing attributes self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights") ''' Iterative Deepening Negamax This implements a time-cutoff such that search is terminated once we have reached the allocated time for evaluation. IT RETURNS THE BEST MOVE IT HAS FOUND IN THE TIME ALLOCATED ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: if self.board.phase == constant.PLACEMENT_PHASE: # clear the transposition table every time we want to evaluate a move in placement phase # this is to limit the size of growth self.tt.clear() # set the max depth iterations based on the phase that we are in MAX_ITER = 5 else: MAX_ITER = 11 # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(self.player) # if there are no available actions to make, therefore we just return None -- this is a forfeit if len(available_actions) == 0: return None if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = 1500 else: self.time_alloc = 1200 # if we have reached 100 moves in the game and the game if self.total_time > 90000 or self.board.move_counter > 120: self.time_alloc = 500 # if we are near the final shrinking phase, then we can decrease the time it has to # evaluate if self.board.move_counter > 150: self.time_alloc = 190 best_depth = 1 val, move = 0, None # set the time remaining for each move evaluation self.time_rem = self.time_alloc # iterative deepening begins here for depth in range(1, MAX_ITER): # get the best move until cut off is reached try: self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() # update the time remaining self.time_rem = self.time_alloc - (self.time_end - self.time_start) best_depth += 1 except TimeOut: break # add the total time to the time allocated self.total_time += self.time_alloc # print(self.total_time) print(best_depth - 1) self.eval_depth = best_depth - 1 return move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) # get the current time in milliseconds @staticmethod def curr_millisecond_time(): return int(time() * 1000) ''' NEGAMAX DRIVER FUNCTION -- THIS IMPLEMENTS THE FOLLOWING: - NEGAMAX WITH A TRANSPOSITION TABLE - MOVE ORDERING USING THE BEST MOVE WE HAVE FOUND SO FAR (IF IT EXISTS IN THE TRANSPOSITION TABLE) - MOVE ORDERING OF THE MOVES WE THINK TO BE FAVOURABLE USING A LIGHTWEIGHT EVALUATION FUNCTION - SELECTING ONLY THE TOP FAVOURABLE MOVES TO EVALUATE USING MINIMAX -- THIS IS HEAVY GREEDY PRUNING APPLIED TO NEGAMAX DESIGNED SUCH THAT WE ONLY LOOK AT MOVES THAT WE THINK WILL PRODUCE A GOOD OUTCOME, THUS PRUNING ANY MOVES THAT HAVE A HIGH CHANGE OF HAVING NO EFFECT ON THE GAME-STATE UTILITY. ''' def negamax(self, depth, alpha, beta, colour): # print(self.board.board_state) # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: # get the value mappings from the dictionary board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move if tt_depth >= depth: # this is the PV node therefore this is the best move that we have found so far if tt_type == constant.TT_EXACT: return tt_value, tt_best_move # the minimax value in the transposition table is a lower bound to the search elif tt_type == constant.TT_LOWER: if tt_value > alpha: alpha = tt_value # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta elif tt_type == constant.TT_UPPER: if tt_value < beta: beta = tt_value # test for cutoff -- return the best move found so far if alpha >= beta: return tt_value, tt_best_move # obtain the actions and sort them actions = self.board.update_actions(colour) actions = self.board.sort_actions(actions, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the negamax search search best_val = -inf best_action = None # if we have found a best action to take in the transposition table, this should be the first # move we should try -- put this at the start of the list of actions if move_to_try is not None and move_to_try in actions: # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 # split the list of actions into favourable and unfavourable actions # we only consider to search teh favourable actions if the action list is long enough if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # iterate only through the favourable moves, ensuring that the number of moves is not too big # the aim is to reduce the branching factor as much as we can, but also having enough moves to # evaluate such that we get the part of the optimality decision making from negamax/minimax # rather than a purely greedy approach. # print(len(favourable)) for action in favourable: # skip over the best action in the tt table -- this action has already be searched if action == move_to_try and i != 0: continue i += 1 # update the board, record the eliminated pieces from that update elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score # undo the action applied to the board self.undo_action(action, colour, elim) # get the best score and action so far if score > best_val: best_val = score best_action = action # update alpha if needed if best_val > alpha: alpha = best_val # test for cut off if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upper bound tt_type = constant.TT_UPPER elif best_val >= beta: # if the best value we have found is a lower bound tt_type = constant.TT_LOWER # print("LOWER") else: # this is the PV node value tt_type = constant.TT_EXACT # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action # cut-off test -- either depth is zero or the board is at terminal state def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False # evaluate the game state def evaluate_state(self, board, colour, actions): return self.evaluation.evaluate(board, colour, actions) # update the negamax board representation for another search def update_board(self, board): self.board = deepcopy(board) # terminal state check def is_terminal(self): return self.board.is_terminal() # undo board wrapper def undo_action(self, action, colour, elim): return self.board.undo_action(action, colour, elim)
def evaluate_state(self, board): return Evaluation.basic_policy(board, self.player)
class MinimaxABOptimised(object): def __init__(self, board, colour): # we want to create a node self.transposition_table = set() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def iterative_deepening_alpha_beta(self): ''' I dont think this is working correctly -- i believe when things are getting cached because it doesnt take in consideration the depth of the call of that minimax evaluation we need to take into consideration the depth for it to call correctly need to change this ''' MAX_ITER = 100 # default policy available_actions = self.board.update_actions(self.board, self.player) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None else: # lets just set the default to the first move move = available_actions[0] # time allocated per move in ms self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = 500 else: self.time_alloc = 1000 # get time start_time = MinimaxABOptimised.curr_millisecond_time() best_depth = 1 # iterative deepening begins here for depth in range(1, MAX_ITER): print(depth) # invalidate / clear the cache when increasing the search depth cutoff self.min_value.cache_clear() try: #self.max_value.cache_clear() # peform the search self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() move = self.alpha_beta_minimax(depth, available_actions) print(move) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) # after one iteration of ab search we can order the moves based on the actions that # the previous depth evaluated the actions at available_actions = [] while len(self.actions_evaluated) > 0: (val, action) = heapq._heappop_max(self.actions_evaluated) available_actions.append(action) # transform the heap into a max heap heapq._heapify_max(self.actions_evaluated) # update the available_actions list available_actions = available_actions + self.actions_leftover best_depth += 1 except TimeOut: print("TIMEOUT") break if MinimaxABOptimised.curr_millisecond_time( ) - start_time > self.time_alloc: break self.eval_depth = best_depth return move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) def alpha_beta_minimax(self, depth, available_actions): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut self.actions_evaluated = [] if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: self.min_value.cache_clear() # self.max_value.cache_clear() best_move = None alpha = -inf evaluate = -inf beta = inf # get the available moves of the board (based on the current board representation) # we can generate the actions as we wish -- this can easily change -- TODO : OPTIMISATION/ PRUNING OF ACTION __ CAN BE GREEDY __ favoured moves and unfavoured moves self.actions_leftover = self.board.update_actions( self.board, self.player) # self.actions_leftover = self.board.update_actions(self.board,self.player) for action in available_actions: # update the minimax board representation with the action self.board.update_board(action, self.player) # get the board representation for caching board_string = self.board.board_state.decode("utf-8") try: ab_evaluate = self.min_value(board_string, self.opponent, self.board.phase, depth - 1) except TimeOut: raise TimeOut heapq.heappush(self.actions_evaluated, (ab_evaluate, action)) self.actions_leftover.remove(action) if ab_evaluate > evaluate: best_move = action evaluate = ab_evaluate # undo the move self.undo_effected = self.undo_move() if evaluate >= beta: self.minimax_val = evaluate return best_move alpha = max(alpha, evaluate) self.minimax_val = evaluate return best_move # memoize the function call -- opitimisation #@lru_cache(maxsize=10000) def max_value(self, board_string, colour, phase, depth): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut evaluate = -inf if self.cutoff_test(depth): return self.evaluate_state(self.board) # visit each available move available_actions = self.board.update_actions(self.board, colour) for action in available_actions: # update the board representation with the move self.board.update_board(action, colour) # create an immutable object for board_string such that we can call lru_cache on the max function call board_string = self.board.board_state.decode("utf-8") # get the minimax value for this state try: evaluate = max( evaluate, self.min_value(board_string, self.opponent, self.board.phase, depth - 1)) except TimeOut: raise TimeOut # undo the move so that we can apply another action self.undo_effected = self.undo_move() if evaluate >= self.beta: return evaluate self.alpha = max(evaluate, self.alpha) return evaluate # memoize the min value results -- optimisation of its function call @lru_cache(maxsize=100000) def min_value(self, board_string, colour, phase, depth): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut # beginning evaluation value evaluate = inf if self.cutoff_test(depth): return self.evaluate_state(self.board) # generate the actions to search on available_actions = self.board.update_actions(self.board, colour) for action in available_actions: # update the board representation -- this action is the min nodes's action self.board.update_board(action, colour) board_string = self.board.board_state.decode("utf-8") # find the value of the max node try: evaluate = min( evaluate, self.max_value(board_string, self.player, self.board.phase, depth - 1)) except TimeOut: raise TimeOut # undo the board move so that we can apply another move # -- we also go up a level therefore we need to increment depth self.undo_effected = self.undo_move() ''' if beta <= alpha: # when we break from the loop make sure to undo the move break ''' if evaluate <= self.alpha: return evaluate self.beta = min(self.beta, evaluate) return evaluate def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board): #return Evaluation.basic_policy(board, self.player) return self.evaluation.evaluate(board, self.player) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def check_symmetry(self, board_state): transformation = MinimaxABUndo.apply_horizontal_reflection(board_state) board = deepcopy(board_state) if transformation.decode("utf-8") in self.visited: return True else: self.visited.add(board.decode("utf-8")) return False @staticmethod def apply_horizontal_reflection(board_state): temp = '' for index in range(constant.BOARD_SIZE**2): temp += constant.FREE_SPACE temp = bytearray(temp, 'utf-8') for row in range(constant.BOARD_SIZE): for col in range(constant.BOARD_SIZE): Board.set_array_char( temp, 7 - row, 7 - col, Board.get_array_element(board_state, row, col)) # print(temp) # print(board_state) return temp def undo_move(self): return self.board.undo_move() # then we need to recalculate the available moves based on the board representation # self.generate_actions() ''' ################################################################################# # METHODS FOR THE DICTIONARY REPRESENTATION OF THE AVAILABLE MOVES ON THE BOARD # # # # # ################################################################################ ''' # we update the available actions when we update the board representation def generate_actions(self): if self.board.phase == constant.PLACEMENT_PHASE: self.init_available_placement_actions() self.start_available_actions_placement() elif self.board.phase == constant.MOVING_PHASE: #print(self.board.piece_pos) #print("dsfsf") self.init_available_moving_actions() def init_available_placement_actions(self): # initialise the dictionary with the available placements on the board for row in range(constant.BOARD_SIZE): for col in range(constant.BOARD_SIZE): piece = col, row # print(col,row) for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): if Board.within_starting_area(piece, colour): temp = {piece: constant.PLACEMENT_PHASE} # print(temp) self.available_actions[colour].update(temp) def start_available_actions_placement(self): # get rid of all pieces that exist on the board for colour in (constant.BLACK_PIECE, constant.WHITE_PIECE): for piece in self.board.piece_pos[colour]: if piece in self.available_actions[constant.WHITE_PIECE]: if Board.within_starting_area(piece, constant.WHITE_PIECE): self.available_actions[constant.WHITE_PIECE].pop(piece) if Board.within_starting_area(piece, constant.BLACK_PIECE): self.available_actions[constant.BLACK_PIECE].pop(piece) def init_available_moving_actions(self): # clear the dictionary self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): for piece in self.board.piece_pos[colour]: #print(piece) self.update_actions_dict_entry(piece, colour) # need to ensure that we call this after an update to the minimax board representation def update_available_moves(self, action, colour): # if there were any eliminated pieces last move retrieve them from the stack -- but make sure not to pop them # off the stack completely eliminated_pieces = self.board.eliminated_pieces_last_move( self.board.phase, self.board.move_counter, pop=False) # action is in the form (position, movetype) # -- i,e. we are moving the piece at position by the movetype # -- when an action is called we have move that piece already and we need to change # -- the entries in the dictionary according to that move # colour is the colour of the piece we have moved # read in the pieces on the board -- if they already exist in the dictionary # then we dont need to do anything -- if they don't exist in the dictionary # need to look at all the eliminated pieces on the board # -- look for pieces in the vicinity of that space # -- delete keys associated with those eliminated pieces as these are pieces on the board # -- that do not exists anymore, therefore there are no associated moves with this piece # -- update the available moves of the pieces that can move into that square # need to update the available moves of the piece at its new location # delete entry in the dictionary that corresponds to the old position old_pos = action[0] #print(old_pos) #print(action) new_pos = Board.convert_move_type_to_coord(old_pos, action[1]) # first we need to update the dictionary by removing the old piece from the # dictionary -- as this is not an available move anymore if old_pos in self.available_actions[colour]: #print("old") self.available_actions[colour].pop(old_pos) else: pass # need to raise an error saying # then add an entry into the dictionary corresponding to the new location of the piece # after the move has been applied if new_pos not in self.available_actions[colour]: self.update_actions_dict_entry(new_pos, colour) else: pass # need to raise an error # remove all eliminated pieces from the dictionary for piece_type in (constant.WHITE_PIECE, constant.BLACK_PIECE): for piece in eliminated_pieces[piece_type]: if piece in self.available_actions[piece_type]: self.available_actions[piece_type].pop(piece) else: pass # need to raise an error # update any piece that is surrounding the old position but also any eliminated pieces and update # their available moves by adding the corresponding move type to that list # this old position is now a free space on the board and therefore pieces are able to now move into it # need to test all positions surround this newly freed space and update their available actions for move_type in range(constant.MAX_MOVETYPE): # iterate through all the possible moves at the old location, checking # whether or not there is a piece there # if there is a piece at that location we can update that piece's available moves piece = Board.convert_move_type_to_coord(old_pos, move_type) for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): if piece in self.available_actions[piece_colour]: if move_type < 4: self.update_actions_dict_entry(piece, piece_colour) else: if self.board.can_jump_into_position( old_pos, move_type): self.update_actions_dict_entry(piece, piece_colour) # update the pieces around any eliminated pieces for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): # iterate through all the eliminated pieces on the board for elim_piece in eliminated_pieces[piece_colour]: # for each eliminated piece we apply a move (move_type to it), checking if there is a piece # at this position on the board, we do this by checking the available moves dictionary # if there is a piece associated with that position on the board then if it is a one step move # we just need to update that pieces available moves, if it is a jump, then we need to test if there # is an adjacent piece between the jump and the free space -- do this by calling # can_jump_into_position -- for a given space, if we apply a move_type corresponding to a # two piece move, can we jump into this free spot # if we can then we just need to update this pieces available actions piece = Board.convert_move_type_to_coord( elim_piece, move_type) ''' # if this piece corresponds to an entry in the dictionary, then there is a piece at this location if piece in self.available_actions[piece_colour]: # one step moves if move_type < 4: self.update_actions_dict_entry(piece,piece_colour) else: # need to check if a jump is available into the free space # if the piece at the jump location is in the available_action dict if self.board.can_jump_into_position(elim_piece,move_type): self.update_actions_dict_entry(piece,piece_colour) ''' self.update_surrounding_pieces(piece) # update the available moves of the pieces that surround where the # new position of the piece is -- this is no longer an occupied space therefore pieces surrounding # it cannot move into this space anymore piece = Board.convert_move_type_to_coord(new_pos, move_type) for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): if piece in self.available_actions[piece_colour]: ''' if move_type < 4: self.update_actions_dict_entry(piece,piece_colour) else: # treat this old position as a free space -- if there are pieces # that can jump into this piece we have to update these pieces available # actions because this space is no longer free if self.board.can_jump_into_position(new_pos,move_type): self.update_actions_dict_entry(piece,piece_colour) ''' self.update_surrounding_pieces(piece) # HELPER METHOD THAT ALLOWS TO UPDATE A PARTICULAR PIECES AVAILABLE ACTIONS IN THE DICTIONARY def update_actions_dict_entry(self, piece, colour): temp_list = self.get_piece_legal_moves(piece) update_entry = {piece: temp_list} self.available_actions[colour].update(update_entry) # get a list of the legal moves of a particular piece def get_piece_legal_moves(self, piece): available_moves = [] for move_type in range(constant.MAX_MOVETYPE): if self.board.is_legal_move(piece, move_type): available_moves.append(move_type) #print(available_moves) return available_moves def update_available_placement(self, action): # to update the available actions in the placement phase we just need to read in the action made # remove this entry from the dictionary # add the entries of any eliminated positions in the dictionary elim = [] eliminated_pieces = self.board.eliminated_pieces_last_move( self.board.phase, self.board.move_counter, pop=False) #print("ELIMINATED: ",end='') #print(eliminated_pieces) #print("AVAILABLE: ",end='') #print(self.available_actions) for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): if Board.within_starting_area(action, colour): # remove the action from the entry of the dictionary if action in self.available_actions[colour]: self.available_actions[colour].pop(action) # add all the eliminated pieces to the available moves of the dictionary for piece in eliminated_pieces[colour]: elim.append(piece) for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE): for piece in elim: if Board.within_starting_area(piece, colour): update_entry = {piece: constant.PLACEMENT_PHASE} self.available_actions[colour].update(update_entry) def update_available_actions(self, action, colour): if self.board.phase == constant.PLACEMENT_PHASE: self.update_available_placement(action) elif self.board.phase == constant.MOVING_PHASE: if self.board.move_counter == 0: self.update_available_placement(action) else: self.update_available_moves(action, colour) # return a list of actions corresponding to a particular board state def get_actions(self, colour): actions = [] if self.board.phase == constant.PLACEMENT_PHASE: for key in self.available_actions[colour].keys(): # return a list containing the free spaces on the board that a player can place a piece into actions.append(key) return actions elif self.board.phase == constant.MOVING_PHASE: if self.board.move_counter == 0: self.init_available_moving_actions() for key in self.available_actions[colour].keys(): for move_type in self.available_actions[colour][key]: # return a list of the piece_position and the move it can make actions.append((key, move_type)) return actions ''' This method is only called after an undo_move call -- this is because undo move will set the pieces_effected_undo attribute to being a list This list will contain all pieces that have been effected when the undo move is called Therefore when we are restoring the available actions lists after the undo_move call, we just need to update the entries that have been affected by the undo move pieces that have been effected by an undo move are: - any eliminated piece -- this position is now a free space on the board - therefore after an undo call is made -- these pieces should now be placed back onto the available actions list - The list is in the form (action, colour, undo_type) - Undo type tells us what type of piece has been effected by an undo -- and what was that location of the board before an undo - constant.PLACE_LOC -- we have placed a piece here, therefore to establish the old state, when we called undo move, we got rid of this piece from this board, thus to reestablish the old available moves we just need to add this position (if valid) into the dictionary of the pieces - constant.ELIMINATED_LOC -- a piece has been eliminated at this location previously, therefore when we undo a move, this piece is now occupied again. Therefore we need to update the pieces that surround it (if in the moving phase) or remove this piece from the dictionary if we are in the placement phase. - constant.PIECE_OLD_LOC -- relates to the moving phase: we have moved a piece from this position to a new position therefore in the original available actions list, this action should be removed from the dictionary and we need to update any pieces that surround this piece - constant.PIECE_NEW_LOC -- this relates to the moving phase: we have moved a piece from an old location to this location, therefore this position should not exist in the old dictionary, thus we need to add it back to the old dictionary and update any surrounding pieces considering the edge cases -- - shrinking corners: this should already be handled by the undo_move function - all pieces that have been eliminated due to a shrink should be in the effected list - PLACEMENT->MOVING transition - treat the effected pieces as placement phase pieces -- might be worth just revaluating the board completely here - when we are undoing a change from moving to placement phase -- undo already changes the phase and moving counter so this should not be an issue ''' def undo_available_placement(self): # we just need to pop each piece from the undo_moves effected pieces while len(self.undo_effected) > 0: action = self.undo_effected.pop() #print("POP") #print(action) loc = action[0] #print(loc) colour = action[1] undo_type = action[2] opponent = Board.get_opp_piece_type(colour) if undo_type == constant.ELIMINATED_PIECE: # this piece was eliminated before the undo move, now we have placed it back on the board with undo if loc in self.available_actions[colour]: # remove the action from the dictionary of the corresponding colour self.available_actions[colour].pop(loc) if loc in self.available_actions[opponent]: self.available_actions[opponent].pop(loc) elif undo_type == constant.PLACE_LOC: # a piece was was placed at this location at prior to calling undo move # therefore to reestablish the original available moves list, then we need to add # this piece to the corresponding dict if loc not in self.available_actions[colour] and loc not in\ self.available_actions[opponent]: # if we can place a piece at this location again -- then this piece corresponds to a free space if self.board.within_starting_area(loc, colour): temp = {loc: constant.PLACEMENT_PHASE} self.available_actions[colour].update(temp) if self.board.within_starting_area(loc, opponent): temp = {loc: constant.PLACEMENT_PHASE} self.available_actions[opponent].update(temp) def undo_available_moves(self): for tup in self.undo_effected: #print(tup) loc = tup[0] colour = tup[1] undo_type = tup[2] # get rid of relevent entries in the dictionary if undo_type == constant.PIECE_OLD_LOC: # if it is an old location it currently does not exist in the dictionary since it was deleted when # it was updated # add it back self.update_actions_dict_entry(loc, colour) self.update_surrounding_pieces(loc) elif undo_type == constant.PIECE_NEW_LOC: #print(loc) # if it is a new location it currently exists in the dictionary, and we must remove it if loc in self.available_actions[colour]: self.available_actions[colour].pop(loc) self.update_surrounding_pieces(loc) elif undo_type == constant.ELIMINATED_PIECE: # if there were eliminated pieces that were put back onto the board in the undo move -- then these # pieces would not exist in the current available move dictionary self.update_actions_dict_entry(loc, colour) self.update_surrounding_pieces(loc) # clear the undo-effected list self.undo_effected = [] # given a center_position -- update the pieces that surround that centre position if they exist def update_surrounding_pieces(self, center_pos): for move_type in range(constant.MAX_MOVETYPE): potential_piece = Board.convert_move_type_to_coord( center_pos, move_type) # check if the potential piece is a piece if potential_piece in self.available_actions[constant.WHITE_PIECE]: # then it is a piece on the board # update this piece self.update_actions_dict_entry(potential_piece, constant.WHITE_PIECE) elif potential_piece in self.available_actions[ constant.BLACK_PIECE]: self.update_actions_dict_entry(potential_piece, constant.BLACK_PIECE) def restore_available_actions(self): if self.board.phase == constant.PLACEMENT_PHASE: self.undo_available_placement() elif self.board.phase == constant.MOVING_PHASE: self.undo_available_moves() def alpha_beta(self, depth): self.generate_actions() if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: self.min_value.cache_clear() # self.max_value.cache_clear() best_move = None alpha = -inf evaluate = -inf beta = inf # get the available moves of the board (based on the current board representation) # we can generate the actions as we wish -- this can easily change -- TODO : OPTIMISATION/ PRUNING OF ACTION __ CAN BE GREEDY __ favoured moves and unfavoured moves # self.actions_leftover = self.board.update_actions(self.board,self.player) available_actions = self.get_actions(self.player) for action in available_actions: # update the minimax board representation with the action self.board.update_board(action, self.player) self.update_available_actions(action, self.player) # get the board representation for caching board_string = self.board.board_state.decode("utf-8") ab_evaluate = self.min_v(board_string, self.opponent, self.board.phase, depth - 1) if ab_evaluate > evaluate: best_move = action evaluate = ab_evaluate # undo the move self.undo_effected = self.undo_move() self.restore_available_actions() if evaluate >= beta: self.minimax_val = evaluate return best_move alpha = max(alpha, evaluate) self.minimax_val = evaluate return best_move # memoize the function call -- opitimisation #@lru_cache(maxsize=10000) def max_v(self, board_string, colour, phase, depth): evaluate = -inf if self.cutoff_test(depth): return self.evaluate_state(self.board) # visit each available move available_actions = self.get_actions(colour) for action in available_actions: #print(action) #print(self.board.move_counter, self.board.phase) # update the board representation with the move self.board.update_board(action, colour) self.update_available_actions(action, colour) # create an immutable object for board_string such that we can call lru_cache on the max function call board_string = self.board.board_state.decode("utf-8") # get the minimax value for this state evaluate = max( evaluate, self.min_v(board_string, self.opponent, self.board.phase, depth - 1)) # undo the move so that we can apply another action self.undo_effected = self.undo_move() self.restore_available_actions() if evaluate >= self.beta: return evaluate self.alpha = max(evaluate, self.alpha) return evaluate # memoize the min value results -- optimisation of its function call @lru_cache(maxsize=1000) def min_v(self, board_string, colour, phase, depth): # beginning evaluation value evaluate = inf if self.cutoff_test(depth): return self.evaluate_state(self.board) # generate the actions to search on available_actions = self.get_actions(colour) for action in available_actions: # update the board representation -- this action is the min nodes's action self.board.update_board(action, colour) self.update_available_actions(action, colour) board_string = self.board.board_state.decode("utf-8") # find the value of the max node evaluate = min( evaluate, self.max_v(board_string, self.player, self.board.phase, depth - 1)) # undo the board move so that we can apply another move # -- we also go up a level therefore we need to increment depth self.undo_effected = self.undo_move() self.restore_available_actions() if evaluate <= self.alpha: return evaluate self.beta = min(self.beta, evaluate) return evaluate
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() MAX_ITER = 10 # default policy available_actions = self.board.update_actions(self.board, self.player) print(len(available_actions)) action_set = set(available_actions) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' # self.time_alloc = 5000 # time allocated per move in ms self.time_alloc = 0 total = 120000 if self.board.phase == constant.PLACEMENT_PHASE: #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 1000 else: #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 1000 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here best_move = None for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 # if we have a move that is not none lets always pick that move that is legal # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why # therefore here we check if a move is legal as well if move is not None and move in action_set: best_move = move except TimeOut: print("TIMEOUT") break if Negamax.curr_millisecond_time() - start_time > self.time_alloc: break self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) # naive Negamax (depth limited) -- No Transposition Table def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) dic = {self.player: 1, self.opponent: -1} # generate legal actions actions_1 = self.board.update_actions(self.board, colour) # print(len(actions)) actions = self.board.sort_actions(actions_1, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None # generate legal actions #actions = self.board.update_actions(self.board, colour) # split the actions into favourable an unfavourable # if the length of actions is greater than X, then we can just choose to look through the first # 5 'favourable' actions that we see right now # if the length of actions is less than X, then we can just evaluate all possible actions we have # THIS IS A GREEDY APPROACH TO MINIMAX THAT LIMITS OUR BRANCHING FACTOR OF THE GAME if len(actions) > 8: favourable = actions[:8] else: favourable = actions # got here #print("got here") # depth reduction R = 2 #print(favourable) #self.board.print_board() for action in favourable: self.board.update_board(action, colour) if action in favourable: score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) else: score, temp = self.negamax(depth - 1 - R, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour, actions): #return Evaluation.basic_policy(board,colour) return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_move(self): return self.board.undo_move()
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(self.player) action_set = set(available_actions) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' # self.time_alloc = 5000 # time allocated per move in ms self.time_alloc = 0 total = 120000 if self.board.phase == constant.PLACEMENT_PHASE: #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 3000 else: #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 800 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here best_move = None for depth in range(1, MAX_ITER): print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 # if we have a move that is not none lets always pick that move that is legal # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why # therefore here we check if a move is legal as well if move is not None and move in action_set: best_move = move # print(self.board) # print("sdfsfsfsfsfsdfsfsdfs") except TimeOut: break self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) # naive Negamax (depth limited) -- No Transposition Table def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) # for evaluation dic = {self.player: 1, self.opponent: -1} # generate legal actions actions = self.board.update_actions(colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None #print(self.board) #print(actions) #print(self.board.white_pieces) # print(self.board.black_pieces) # generate legal actions # actions = self.board.update_actions(colour) # print("THESE ACTIONS----------------") # print(actions) # print(self.board) # print("*"*30) for action in actions: # print("THIS CALL--------") # print(self.board) # print("THIS CALL--------") # if self.board.phase == constant.MOVING_PHASE: # piece = self.board.get_piece(action[0]) # direction = action[1] # if piece.is_legal_move(direction) is False: # print(actions) # print(self) # print("WHYYYYYYYYYYYYYY--------------------------------------------") # print(action[0], direction, colour) # print(piece) # print(piece.get_legal_actions()) elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) self.undo_action(action, colour, elim) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score if alpha >= beta: break return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False def evaluate_state(self, board, colour, actions): # return len(self.board.white_pieces) - len(self.board.black_pieces) return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_action(self, action, colour, elim_pieces): self.board.undo_action(action, colour, elim_pieces)