Пример #1
0
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0
        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")
Пример #2
0
    def sort_actions(self, actions):
        action_heap = []
        result = []
        for action in actions:
            self.board.update_board(action, self.player)

            val = Evaluation.basic_policy(self.board, self.player)
            self.undo_move()

            heapq.heappush(action_heap, (val, action))

        heapq._heapify_max(action_heap)

        while len(action_heap) > 0:
            result.append(heapq._heappop_max(action_heap)[1])
        return result
Пример #3
0
class Negascout(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0
        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negascout(self):
        colour = self.player
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        if self.board.phase == constant.PLACEMENT_PHASE:
            # clear the transposition table every time we want to evaluate a move in placement phase
            # this is to limit the size of growth
            self.tt.clear()

            # set the max depth iterations based on the phase that we are in
            MAX_ITER = 5
        else:
            MAX_ITER = 11

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(colour)
        action_set = set(available_actions)

        if len(available_actions) == 0:
            return None

        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = 1500
        else:
            self.time_alloc = 1200

            # if we have reached 100 moves in the game and the game
            if self.total_time > 90000 or self.board.move_counter > 120:
                self.time_alloc = 500
                # if we are near the final shrinking phase, then we can decrease the time it has to
                # evaluate
                if self.board.move_counter > 150:
                    self.time_alloc = 150

        best_depth = 1
        val, move = 0, None
        best_move = None
        self.time_rem = self.time_alloc

        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:

                self.time_start = self.curr_millisecond_time()
                val, move = self.negascout(depth, -inf, inf, self.player)
                # move = self.negascout(depth,self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                if move is not None and move in action_set:
                    best_move = move

            except TimeOut:
                print("TIMEOUT")
                break
        # add the time allocated to the total time
        self.total_time += self.time_alloc
        self.eval_depth = best_depth
        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    def negascout(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move
            #print(move_to_try)
            #print("FOUND ENTRY IN TT")

            if tt_depth >= depth:
                if tt_type == constant.TT_EXACT:
                    #print("FOUND PV")
                    return tt_value, tt_best_move
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        #print("FOUND FAIL SOFT")
                        alpha = tt_value

                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        #print("FOUND FAIL HARD")
                        beta = tt_value

                if alpha >= beta:
                    return tt_value, tt_best_move

        actions = self.board.update_actions(colour)
        # actions = actions_1
        actions = self.board.sort_actions(actions, colour)
        #actions = actions_1
        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None

        if move_to_try is not None and move_to_try in actions:
            #print("MOVE ORDERING")
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]
        # print(len(actions))

        # start negascout here
        for i, action in enumerate(favourable):
            # skip over the best action in the tt table
            if action == move_to_try and i > 0:
                continue

            elim = self.board.update_board(action, colour)

            # if we are at the first node -- this is the best node we have found so far
            # therefore we do a full search on this node
            if i == 0:
                # do a full search on the best move found so far
                score, _ = self.negascout(depth - 1, -beta, -alpha, opponent)
                score = -score

            else:
                # assume that the first move is the best move we have found so far,
                # therefore to see if this is the case we can do a null window search on the
                # rest of the moves, if the search breaks, then we know that the first move is
                # the best move and it will return the best move
                # but if the search "failed high" - i.e. the score is between alpha and beta
                # we need to do a full research of the node to work out the minimax value

                # do the null window search
                score, _ = self.negascout(depth - 1, -alpha - 1, -alpha,
                                          opponent)
                score = -score

                # if it failed high, then we just do a full search to find the actual best move
                if alpha < score < beta:
                    score, _ = self.negascout(depth - 1, -beta, -score,
                                              opponent)
                    score = -score

            # get the best value and score
            if best_val < score:
                best_val = score
                best_action = action

            # reset alpha
            if alpha < score:
                alpha = score

            # undo the action applied to the board -- we can now apply another move to the board
            self.undo_actions(action, colour, elim)

            # test for alpha beta cutoff
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upperbound -FAILHARD
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            tt_type = constant.TT_EXACT
            # print("EXACT")

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour, actions):
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_actions(self, action, colour, elim):
        return self.board.undo_action(action, colour, elim)
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
            #if self.board.phase == constant.PLACEMENT_PHASE:
            self.tt.clear()

        MAX_ITER = 10

        # default policy
        available_actions = self.board.update_actions(self.board, self.player)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        self.time_alloc = 5000

        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None
        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1
            except TimeOut:
                print("TIMEOUT")
                break

            if Negamax.curr_millisecond_time() - start_time > self.time_alloc:
                break

        self.eval_depth = best_depth
        return move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}
        '''
        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str,colour,phase=self.board.phase)
        if key is not None:
            board_str = key[0]
            entry = self.tt.get_entry(board_str,colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move

            #print("FOUND ENTRY IN TT")
            if tt_depth >= depth:
                if tt_type == constant.TT_EXACT:
                    #print("FOUND PV")
                    return tt_value, tt_best_move
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        #print("FOUND FAIL SOFT")
                        alpha = tt_value

                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        #print("FOUND FAIL HARD")
                        beta = tt_value

                if alpha >= beta:
                    return tt_value, None
        '''
        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player)  #*dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None
        actions = self.board.update_actions(self.board, colour)
        '''
        if move_to_try is not None and move_to_try in actions:
            #print("MOVE ORDERING")
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions
        i = 0
        '''
        # get the favourable moves of the board
        actions = self.get_favourable_actions(self.available_actions)
        # if there are no favourable actions to iterate on - raise
        if len(actions) < 0:
            raise ReturnUnfavourableMove

        for action in actions:
            # skip over the best action in the tt table
            '''
            if action == move_to_try and i!= 0:
                continue
            i+=1
            '''
            self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            self.undo_move()

            if alpha >= beta:
                break
        '''
        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upperbound -FAILHARD
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            tt_type = constant.TT_EXACT
            # print("EXACT")
        '''
        # add the entry to the transposition table
        # self.tt.add_entry(self.board.board_state,colour,best_val,tt_type,best_action, depth)
        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour):
        #return Evaluation.basic_policy(board,colour)
        return self.evaluation.evaluate(board, self.player)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_move(self):
        return self.board.undo_move()
class Negamax(object):
    def __init__(self, board, colour, file_name):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # timing attributes
        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0

        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")

    '''
    Iterative Deepening Negamax 
    
    This implements a time-cutoff such that search is terminated once we have reached the allocated time for evaluation.
    
    IT RETURNS THE BEST MOVE IT HAS FOUND IN THE TIME ALLOCATED 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        if self.board.phase == constant.PLACEMENT_PHASE:
            # clear the transposition table every time we want to evaluate a move in placement phase
            # this is to limit the size of growth
            self.tt.clear()

            # set the max depth iterations based on the phase that we are in
            MAX_ITER = 5
        else:
            MAX_ITER = 11

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(self.player)

        # if there are no available actions to make, therefore we just return None -- this is a forfeit
        if len(available_actions) == 0:
            return None

        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = 1500
        else:
            self.time_alloc = 1200

            # if we have reached 100 moves in the game and the game
            if self.total_time > 90000 or self.board.move_counter > 120:
                self.time_alloc = 500
                # if we are near the final shrinking phase, then we can decrease the time it has to
                # evaluate
                if self.board.move_counter > 150:
                    self.time_alloc = 190

        best_depth = 1
        val, move = 0, None

        # set the time remaining for each move evaluation
        self.time_rem = self.time_alloc

        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            # get the best move until cut off is reached
            try:

                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                # update the time remaining
                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)

                best_depth += 1
            except TimeOut:
                break

        # add the total time to the time allocated
        self.total_time += self.time_alloc

        # print(self.total_time)
        print(best_depth - 1)

        self.eval_depth = best_depth - 1
        return move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    # get the current time in milliseconds
    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    '''
    NEGAMAX DRIVER FUNCTION -- THIS IMPLEMENTS THE FOLLOWING:
        - NEGAMAX WITH A TRANSPOSITION TABLE 
        - MOVE ORDERING USING THE BEST MOVE WE HAVE FOUND SO FAR (IF IT EXISTS IN THE TRANSPOSITION TABLE) 
        - MOVE ORDERING OF THE MOVES WE THINK TO BE FAVOURABLE USING A LIGHTWEIGHT EVALUATION FUNCTION 
        - SELECTING ONLY THE TOP FAVOURABLE MOVES TO EVALUATE USING MINIMAX -- THIS IS HEAVY GREEDY PRUNING 
          APPLIED TO NEGAMAX DESIGNED SUCH THAT WE ONLY LOOK AT MOVES THAT WE THINK WILL PRODUCE A GOOD OUTCOME,
          THUS PRUNING ANY MOVES THAT HAVE A HIGH CHANGE OF HAVING NO EFFECT ON THE GAME-STATE UTILITY.
    '''

    def negamax(self, depth, alpha, beta, colour):

        # print(self.board.board_state)

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:

            # get the value mappings from the dictionary
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move

            if tt_depth >= depth:
                # this is the PV node therefore this is the best move that we have found so far
                if tt_type == constant.TT_EXACT:
                    return tt_value, tt_best_move

                # the minimax value in the transposition table is a lower bound to the search
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        alpha = tt_value

                # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta
                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        beta = tt_value

                # test for cutoff -- return the best move found so far
                if alpha >= beta:
                    return tt_value, tt_best_move

        # obtain the actions and sort them
        actions = self.board.update_actions(colour)
        actions = self.board.sort_actions(actions, colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the negamax search  search
        best_val = -inf
        best_action = None

        # if we have found a best action to take in the transposition table, this should be the first
        # move we should try -- put this at the start of the list of actions
        if move_to_try is not None and move_to_try in actions:
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        # split the list of actions into favourable and unfavourable actions
        # we only consider to search teh favourable actions if the action list is long enough
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]

        # iterate only through the favourable moves, ensuring that the number of moves is not too big
        # the aim is to reduce the branching factor as much as we can, but also having enough moves to
        # evaluate such that we get the part of the optimality decision making  from negamax/minimax
        # rather than a purely greedy approach.
        # print(len(favourable))
        for action in favourable:

            # skip over the best action in the tt table -- this action has already be searched
            if action == move_to_try and i != 0:
                continue
            i += 1

            # update the board, record the eliminated pieces from that update
            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            score = -score
            # undo the action applied to the board
            self.undo_action(action, colour, elim)

            # get the best score and action so far
            if score > best_val:
                best_val = score
                best_action = action

            # update alpha if needed
            if best_val > alpha:
                alpha = best_val

            # test for cut off
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upper bound
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            # if the best value we have found is a lower bound
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            # this is the PV node value
            tt_type = constant.TT_EXACT

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action

    # cut-off test -- either depth is zero or the board is at terminal state
    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    # evaluate the game state
    def evaluate_state(self, board, colour, actions):
        return self.evaluation.evaluate(board, colour, actions)

    # update the negamax board representation for another search
    def update_board(self, board):
        self.board = deepcopy(board)

    # terminal state check
    def is_terminal(self):
        return self.board.is_terminal()

    # undo board wrapper
    def undo_action(self, action, colour, elim):
        return self.board.undo_action(action, colour, elim)
Пример #6
0
 def evaluate_state(self, board):
     return Evaluation.basic_policy(board, self.player)
Пример #7
0
class MinimaxABOptimised(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.transposition_table = set()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def iterative_deepening_alpha_beta(self):
        '''
        I dont think this is working correctly -- i believe when things are getting cached because it doesnt take in consideration the depth of the call of that minimax evaluation
        we need to take into consideration the depth for it to call correctly

        need to change this
        '''
        MAX_ITER = 100

        # default policy
        available_actions = self.board.update_actions(self.board, self.player)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        else:
            # lets just set the default to the first move
            move = available_actions[0]

        # time allocated per move in ms
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = 500
        else:
            self.time_alloc = 1000

        # get time
        start_time = MinimaxABOptimised.curr_millisecond_time()
        best_depth = 1
        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            print(depth)
            # invalidate / clear the cache when increasing the search depth cutoff
            self.min_value.cache_clear()

            try:
                #self.max_value.cache_clear()
                # peform the search
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                move = self.alpha_beta_minimax(depth, available_actions)
                print(move)
                self.time_end = self.curr_millisecond_time()
                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                # after one iteration of ab search we can order the moves based on the actions that
                # the previous depth evaluated the actions at
                available_actions = []
                while len(self.actions_evaluated) > 0:
                    (val, action) = heapq._heappop_max(self.actions_evaluated)
                    available_actions.append(action)
                # transform the heap into a max heap
                heapq._heapify_max(self.actions_evaluated)

                # update the available_actions list
                available_actions = available_actions + self.actions_leftover

                best_depth += 1

            except TimeOut:
                print("TIMEOUT")
                break

            if MinimaxABOptimised.curr_millisecond_time(
            ) - start_time > self.time_alloc:
                break

        self.eval_depth = best_depth
        return move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    def alpha_beta_minimax(self, depth, available_actions):

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        self.actions_evaluated = []
        if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
            self.min_value.cache_clear()
            # self.max_value.cache_clear()

        best_move = None
        alpha = -inf
        evaluate = -inf
        beta = inf

        # get the available moves of the board (based on the current board representation)
        # we can generate the actions as we wish -- this can easily change -- TODO : OPTIMISATION/ PRUNING OF ACTION __ CAN BE GREEDY __ favoured moves and unfavoured moves
        self.actions_leftover = self.board.update_actions(
            self.board, self.player)
        # self.actions_leftover = self.board.update_actions(self.board,self.player)

        for action in available_actions:
            # update the minimax board representation with the action
            self.board.update_board(action, self.player)

            # get the board representation for caching
            board_string = self.board.board_state.decode("utf-8")
            try:
                ab_evaluate = self.min_value(board_string, self.opponent,
                                             self.board.phase, depth - 1)
            except TimeOut:
                raise TimeOut

            heapq.heappush(self.actions_evaluated, (ab_evaluate, action))
            self.actions_leftover.remove(action)

            if ab_evaluate > evaluate:
                best_move = action
                evaluate = ab_evaluate

            # undo the move
            self.undo_effected = self.undo_move()

            if evaluate >= beta:
                self.minimax_val = evaluate
                return best_move

            alpha = max(alpha, evaluate)

        self.minimax_val = evaluate
        return best_move

    # memoize the function call -- opitimisation
    #@lru_cache(maxsize=10000)
    def max_value(self, board_string, colour, phase, depth):

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        evaluate = -inf

        if self.cutoff_test(depth):
            return self.evaluate_state(self.board)

        # visit each available move
        available_actions = self.board.update_actions(self.board, colour)

        for action in available_actions:

            # update the board representation with the move
            self.board.update_board(action, colour)

            # create an immutable object for board_string such that we can call lru_cache on the max function call
            board_string = self.board.board_state.decode("utf-8")

            # get the minimax value for this state
            try:
                evaluate = max(
                    evaluate,
                    self.min_value(board_string, self.opponent,
                                   self.board.phase, depth - 1))
            except TimeOut:
                raise TimeOut

            # undo the move so that we can apply another action
            self.undo_effected = self.undo_move()

            if evaluate >= self.beta:
                return evaluate

            self.alpha = max(evaluate, self.alpha)

        return evaluate

    # memoize the min value results -- optimisation of its function call
    @lru_cache(maxsize=100000)
    def min_value(self, board_string, colour, phase, depth):

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        # beginning evaluation value
        evaluate = inf

        if self.cutoff_test(depth):
            return self.evaluate_state(self.board)

        # generate the actions to search on
        available_actions = self.board.update_actions(self.board, colour)

        for action in available_actions:

            # update the board representation -- this action is the min nodes's action
            self.board.update_board(action, colour)

            board_string = self.board.board_state.decode("utf-8")

            # find the value of the max node
            try:
                evaluate = min(
                    evaluate,
                    self.max_value(board_string, self.player, self.board.phase,
                                   depth - 1))
            except TimeOut:
                raise TimeOut
            # undo the board move so that we can apply another move
            # -- we also go up a level therefore we need to increment depth
            self.undo_effected = self.undo_move()
            '''
            if beta <= alpha:
                # when we break from the loop make sure to undo the move
                break
            '''

            if evaluate <= self.alpha:
                return evaluate

            self.beta = min(self.beta, evaluate)

        return evaluate

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board):
        #return Evaluation.basic_policy(board, self.player)
        return self.evaluation.evaluate(board, self.player)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def check_symmetry(self, board_state):
        transformation = MinimaxABUndo.apply_horizontal_reflection(board_state)
        board = deepcopy(board_state)
        if transformation.decode("utf-8") in self.visited:
            return True
        else:
            self.visited.add(board.decode("utf-8"))
            return False

    @staticmethod
    def apply_horizontal_reflection(board_state):
        temp = ''
        for index in range(constant.BOARD_SIZE**2):
            temp += constant.FREE_SPACE

        temp = bytearray(temp, 'utf-8')

        for row in range(constant.BOARD_SIZE):
            for col in range(constant.BOARD_SIZE):
                Board.set_array_char(
                    temp, 7 - row, 7 - col,
                    Board.get_array_element(board_state, row, col))
        # print(temp)
        # print(board_state)
        return temp

    def undo_move(self):
        return self.board.undo_move()
        # then we need to recalculate the available moves based on the board representation
        # self.generate_actions()

    '''
    #################################################################################
    # METHODS FOR THE DICTIONARY REPRESENTATION OF THE AVAILABLE MOVES ON THE BOARD #
    #
    #
    #
    #
    ################################################################################
    '''

    # we update the available actions when we update the board representation
    def generate_actions(self):
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.init_available_placement_actions()
            self.start_available_actions_placement()
        elif self.board.phase == constant.MOVING_PHASE:

            #print(self.board.piece_pos)
            #print("dsfsf")
            self.init_available_moving_actions()

    def init_available_placement_actions(self):
        # initialise the dictionary with the available placements on the board
        for row in range(constant.BOARD_SIZE):
            for col in range(constant.BOARD_SIZE):
                piece = col, row
                # print(col,row)
                for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
                    if Board.within_starting_area(piece, colour):
                        temp = {piece: constant.PLACEMENT_PHASE}
                        # print(temp)
                        self.available_actions[colour].update(temp)

    def start_available_actions_placement(self):
        # get rid of all pieces that exist on the board
        for colour in (constant.BLACK_PIECE, constant.WHITE_PIECE):
            for piece in self.board.piece_pos[colour]:
                if piece in self.available_actions[constant.WHITE_PIECE]:
                    if Board.within_starting_area(piece, constant.WHITE_PIECE):
                        self.available_actions[constant.WHITE_PIECE].pop(piece)
                    if Board.within_starting_area(piece, constant.BLACK_PIECE):
                        self.available_actions[constant.BLACK_PIECE].pop(piece)

    def init_available_moving_actions(self):
        # clear the dictionary
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }
        for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
            for piece in self.board.piece_pos[colour]:
                #print(piece)
                self.update_actions_dict_entry(piece, colour)

    # need to ensure that we call this after an update to the minimax board representation
    def update_available_moves(self, action, colour):

        # if there were any eliminated pieces last move retrieve them from the stack -- but make sure not to pop them
        # off the stack completely
        eliminated_pieces = self.board.eliminated_pieces_last_move(
            self.board.phase, self.board.move_counter, pop=False)

        # action is in the form (position, movetype)
        #       -- i,e. we are moving the piece at position by the movetype
        #       -- when an action is called we have move that piece already and we need to change
        #       -- the entries in the dictionary according to that move
        # colour is the colour of the piece we have moved
        # read in the pieces on the board -- if they already exist in the dictionary
        # then we dont need to do anything -- if they don't exist in the dictionary
        # need to look at all the eliminated pieces on the board
        #   -- look for pieces in the vicinity of that space
        #   -- delete keys associated with those eliminated pieces as these are pieces on the board
        #   -- that do not exists anymore, therefore there are no associated moves with this piece
        #   -- update the available moves of the pieces that can move into that square
        # need to update the available moves of the piece at its new location
        # delete entry in the dictionary that corresponds to the old position
        old_pos = action[0]
        #print(old_pos)
        #print(action)
        new_pos = Board.convert_move_type_to_coord(old_pos, action[1])

        # first we need to update the dictionary by removing the old piece from the
        # dictionary -- as this is not an available move anymore
        if old_pos in self.available_actions[colour]:
            #print("old")
            self.available_actions[colour].pop(old_pos)
        else:
            pass
            # need to raise an error saying

        # then add an entry into the dictionary corresponding to the new location of the piece
        # after the move has been applied
        if new_pos not in self.available_actions[colour]:
            self.update_actions_dict_entry(new_pos, colour)
        else:
            pass
            # need to raise an error

        # remove all eliminated pieces from the dictionary
        for piece_type in (constant.WHITE_PIECE, constant.BLACK_PIECE):
            for piece in eliminated_pieces[piece_type]:
                if piece in self.available_actions[piece_type]:
                    self.available_actions[piece_type].pop(piece)
                else:
                    pass
                    # need to raise an error

        # update any piece that is surrounding the old position but also any eliminated pieces and update
        # their available moves by adding the corresponding move type to that list
        # this old position is now a free space on the board and therefore pieces are able to now move into it
        # need to test all positions surround this newly freed space and update their available actions
        for move_type in range(constant.MAX_MOVETYPE):
            # iterate through all the possible moves at the old location, checking
            # whether or not there is a piece there
            # if there is a piece at that location we can update that piece's available moves
            piece = Board.convert_move_type_to_coord(old_pos, move_type)
            for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
                if piece in self.available_actions[piece_colour]:
                    if move_type < 4:
                        self.update_actions_dict_entry(piece, piece_colour)
                    else:
                        if self.board.can_jump_into_position(
                                old_pos, move_type):
                            self.update_actions_dict_entry(piece, piece_colour)

            # update the pieces around any eliminated pieces
            for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
                # iterate through all the eliminated pieces on the board
                for elim_piece in eliminated_pieces[piece_colour]:
                    # for each eliminated piece we apply a move (move_type to it), checking if there is a piece
                    # at this position on the board, we do this by checking the available moves dictionary
                    # if there is a piece associated with that position on the board then if it is a one step move
                    # we just need to update that pieces available moves, if it is a jump, then we need to test if there
                    # is an adjacent piece between the jump and the free space -- do this by calling
                    # can_jump_into_position -- for a given space, if we apply a move_type corresponding to a
                    # two piece move, can we jump into this free spot
                    # if we can then we just need to update this pieces available actions

                    piece = Board.convert_move_type_to_coord(
                        elim_piece, move_type)
                    '''
                    # if this piece corresponds to an entry in the dictionary, then there is a piece at this location
                    if piece in self.available_actions[piece_colour]:
                        # one step moves
                        if move_type < 4:
                            self.update_actions_dict_entry(piece,piece_colour)
                        else:
                            # need to check if a jump is available into the free space
                            # if the piece at the jump location is in the available_action dict
                            if self.board.can_jump_into_position(elim_piece,move_type):
                                self.update_actions_dict_entry(piece,piece_colour)
                    '''
                    self.update_surrounding_pieces(piece)
            # update the available moves of the pieces that surround where the
            # new position of the piece is -- this is no longer an occupied space therefore pieces surrounding
            # it cannot move into this space anymore
            piece = Board.convert_move_type_to_coord(new_pos, move_type)
            for piece_colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
                if piece in self.available_actions[piece_colour]:
                    '''
                    if move_type < 4:
                        self.update_actions_dict_entry(piece,piece_colour)
                    else:
                        # treat this old position as a free space -- if there are pieces
                        # that can jump into this piece we have to update these pieces available
                        # actions because this space is no longer free
                        if self.board.can_jump_into_position(new_pos,move_type):
                            self.update_actions_dict_entry(piece,piece_colour)
                    '''
                    self.update_surrounding_pieces(piece)

    # HELPER METHOD THAT ALLOWS TO UPDATE A PARTICULAR PIECES AVAILABLE ACTIONS IN THE DICTIONARY
    def update_actions_dict_entry(self, piece, colour):
        temp_list = self.get_piece_legal_moves(piece)
        update_entry = {piece: temp_list}
        self.available_actions[colour].update(update_entry)

    # get a list of the legal moves of a particular piece
    def get_piece_legal_moves(self, piece):
        available_moves = []
        for move_type in range(constant.MAX_MOVETYPE):
            if self.board.is_legal_move(piece, move_type):
                available_moves.append(move_type)
        #print(available_moves)
        return available_moves

    def update_available_placement(self, action):
        # to update the available actions in the placement phase we just need to read in the action made
        # remove this entry from the dictionary
        # add the entries of any eliminated positions in the dictionary

        elim = []
        eliminated_pieces = self.board.eliminated_pieces_last_move(
            self.board.phase, self.board.move_counter, pop=False)
        #print("ELIMINATED: ",end='')
        #print(eliminated_pieces)
        #print("AVAILABLE: ",end='')
        #print(self.available_actions)
        for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
            if Board.within_starting_area(action, colour):
                # remove the action from the entry of the dictionary
                if action in self.available_actions[colour]:
                    self.available_actions[colour].pop(action)
            # add all the eliminated pieces to the available moves of the dictionary
            for piece in eliminated_pieces[colour]:
                elim.append(piece)

        for colour in (constant.WHITE_PIECE, constant.BLACK_PIECE):
            for piece in elim:
                if Board.within_starting_area(piece, colour):
                    update_entry = {piece: constant.PLACEMENT_PHASE}
                    self.available_actions[colour].update(update_entry)

    def update_available_actions(self, action, colour):
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.update_available_placement(action)
        elif self.board.phase == constant.MOVING_PHASE:
            if self.board.move_counter == 0:
                self.update_available_placement(action)
            else:
                self.update_available_moves(action, colour)

    # return a list of actions corresponding to a particular board state
    def get_actions(self, colour):
        actions = []
        if self.board.phase == constant.PLACEMENT_PHASE:
            for key in self.available_actions[colour].keys():
                # return a list containing the free spaces on the board that a player can place a piece into
                actions.append(key)

            return actions
        elif self.board.phase == constant.MOVING_PHASE:
            if self.board.move_counter == 0:
                self.init_available_moving_actions()

            for key in self.available_actions[colour].keys():
                for move_type in self.available_actions[colour][key]:
                    # return a list of the piece_position and the move it can make
                    actions.append((key, move_type))
            return actions

    '''
    This method is only called after an undo_move call -- this is because undo move will set the pieces_effected_undo 
    attribute to being a list 
    
    This list will contain all pieces that have been effected when the undo move is called 
    Therefore when we are restoring the available actions lists after the undo_move call, we just need to 
    update the entries that have been affected by the undo move 
    
    pieces that have been effected by an undo move are: 
        - any eliminated piece -- this position is now a free space on the board 
            - therefore after an undo call is made -- these pieces should now be placed back onto the available actions list 
        - The list is in the form (action, colour, undo_type)
            - Undo type tells us what type of piece has been effected by an undo -- and what was that location of the 
              board before an undo 
                - constant.PLACE_LOC -- we have placed a piece here, therefore to establish the old state,
                  when we called undo move, we got rid of this piece from this board, thus to reestablish the old available
                  moves we just need to add this position (if valid) into the dictionary of the pieces 
                  
                - constant.ELIMINATED_LOC -- a piece has been eliminated at this location previously, therefore when we undo
                  a move, this piece is now occupied again. Therefore we need to update the pieces that surround it (if in 
                  the moving phase) or remove this piece from the dictionary if we are in the placement phase. 
                  
                - constant.PIECE_OLD_LOC -- relates to the moving phase: we have moved a piece from this position to a new 
                  position therefore in the original available actions list, this action should be removed from the dictionary
                  and we need to update any pieces that surround this piece 
                  
                - constant.PIECE_NEW_LOC -- this relates to the moving phase: we have moved a piece from an old location to
                  this location, therefore this position should not exist in the old dictionary, thus we need to add it back
                  to the old dictionary and update any surrounding pieces 
    
    considering the edge cases -- 
        - shrinking corners: this should already be handled by the undo_move function 
            - all pieces that have been eliminated due to a shrink should be in the effected list
        - PLACEMENT->MOVING transition 
            - treat the effected pieces as placement phase pieces -- might be worth just revaluating the board completely 
            here 
            - when we are undoing a change from moving to placement phase -- undo already changes the phase and moving counter
              so this should not be an issue 
    '''

    def undo_available_placement(self):
        # we just need to pop each piece from the undo_moves effected pieces
        while len(self.undo_effected) > 0:
            action = self.undo_effected.pop()
            #print("POP")
            #print(action)
            loc = action[0]
            #print(loc)
            colour = action[1]
            undo_type = action[2]
            opponent = Board.get_opp_piece_type(colour)

            if undo_type == constant.ELIMINATED_PIECE:
                # this piece was eliminated before the undo move, now we have placed it back on the board with undo
                if loc in self.available_actions[colour]:
                    # remove the action from the dictionary of the corresponding colour
                    self.available_actions[colour].pop(loc)
                if loc in self.available_actions[opponent]:
                    self.available_actions[opponent].pop(loc)

            elif undo_type == constant.PLACE_LOC:
                # a piece was was placed at this location at prior to calling undo move
                # therefore to reestablish the original available moves list, then we need to add
                # this piece to the corresponding dict
                if loc not in self.available_actions[colour] and loc not in\
                        self.available_actions[opponent]:
                    # if we can place a piece at this location again -- then this piece corresponds to a free space
                    if self.board.within_starting_area(loc, colour):
                        temp = {loc: constant.PLACEMENT_PHASE}
                        self.available_actions[colour].update(temp)

                    if self.board.within_starting_area(loc, opponent):
                        temp = {loc: constant.PLACEMENT_PHASE}
                        self.available_actions[opponent].update(temp)

    def undo_available_moves(self):

        for tup in self.undo_effected:
            #print(tup)
            loc = tup[0]
            colour = tup[1]
            undo_type = tup[2]

            # get rid of relevent entries in the dictionary
            if undo_type == constant.PIECE_OLD_LOC:
                # if it is an old location it currently does not exist in the dictionary since it was deleted when
                # it was updated
                # add it back
                self.update_actions_dict_entry(loc, colour)
                self.update_surrounding_pieces(loc)
            elif undo_type == constant.PIECE_NEW_LOC:
                #print(loc)
                # if it is a new location it currently exists in the dictionary, and we must remove it
                if loc in self.available_actions[colour]:
                    self.available_actions[colour].pop(loc)
                    self.update_surrounding_pieces(loc)
            elif undo_type == constant.ELIMINATED_PIECE:
                # if there were eliminated pieces that were put back onto the board in the undo move -- then these
                # pieces would not exist in the current available move dictionary
                self.update_actions_dict_entry(loc, colour)
                self.update_surrounding_pieces(loc)

        # clear the undo-effected list
        self.undo_effected = []

    # given a center_position -- update the pieces that surround that centre position if they exist
    def update_surrounding_pieces(self, center_pos):
        for move_type in range(constant.MAX_MOVETYPE):
            potential_piece = Board.convert_move_type_to_coord(
                center_pos, move_type)

            # check if the potential piece is a piece
            if potential_piece in self.available_actions[constant.WHITE_PIECE]:
                # then it is a piece on the board
                # update this piece
                self.update_actions_dict_entry(potential_piece,
                                               constant.WHITE_PIECE)
            elif potential_piece in self.available_actions[
                    constant.BLACK_PIECE]:
                self.update_actions_dict_entry(potential_piece,
                                               constant.BLACK_PIECE)

    def restore_available_actions(self):
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.undo_available_placement()
        elif self.board.phase == constant.MOVING_PHASE:
            self.undo_available_moves()

    def alpha_beta(self, depth):
        self.generate_actions()
        if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
            self.min_value.cache_clear()
            # self.max_value.cache_clear()

        best_move = None
        alpha = -inf
        evaluate = -inf
        beta = inf

        # get the available moves of the board (based on the current board representation)
        # we can generate the actions as we wish -- this can easily change -- TODO : OPTIMISATION/ PRUNING OF ACTION __ CAN BE GREEDY __ favoured moves and unfavoured moves

        # self.actions_leftover = self.board.update_actions(self.board,self.player)
        available_actions = self.get_actions(self.player)
        for action in available_actions:
            # update the minimax board representation with the action
            self.board.update_board(action, self.player)
            self.update_available_actions(action, self.player)

            # get the board representation for caching
            board_string = self.board.board_state.decode("utf-8")
            ab_evaluate = self.min_v(board_string, self.opponent,
                                     self.board.phase, depth - 1)
            if ab_evaluate > evaluate:
                best_move = action
                evaluate = ab_evaluate
            # undo the move
            self.undo_effected = self.undo_move()
            self.restore_available_actions()

            if evaluate >= beta:
                self.minimax_val = evaluate
                return best_move

            alpha = max(alpha, evaluate)

        self.minimax_val = evaluate
        return best_move

    # memoize the function call -- opitimisation
    #@lru_cache(maxsize=10000)
    def max_v(self, board_string, colour, phase, depth):

        evaluate = -inf

        if self.cutoff_test(depth):
            return self.evaluate_state(self.board)

        # visit each available move
        available_actions = self.get_actions(colour)
        for action in available_actions:
            #print(action)
            #print(self.board.move_counter, self.board.phase)
            # update the board representation with the move
            self.board.update_board(action, colour)
            self.update_available_actions(action, colour)
            # create an immutable object for board_string such that we can call lru_cache on the max function call
            board_string = self.board.board_state.decode("utf-8")

            # get the minimax value for this state
            evaluate = max(
                evaluate,
                self.min_v(board_string, self.opponent, self.board.phase,
                           depth - 1))

            # undo the move so that we can apply another action
            self.undo_effected = self.undo_move()
            self.restore_available_actions()

            if evaluate >= self.beta:
                return evaluate

            self.alpha = max(evaluate, self.alpha)

        return evaluate

    # memoize the min value results -- optimisation of its function call
    @lru_cache(maxsize=1000)
    def min_v(self, board_string, colour, phase, depth):

        # beginning evaluation value
        evaluate = inf

        if self.cutoff_test(depth):
            return self.evaluate_state(self.board)

        # generate the actions to search on
        available_actions = self.get_actions(colour)
        for action in available_actions:

            # update the board representation -- this action is the min nodes's action
            self.board.update_board(action, colour)
            self.update_available_actions(action, colour)
            board_string = self.board.board_state.decode("utf-8")

            # find the value of the max node
            evaluate = min(
                evaluate,
                self.max_v(board_string, self.player, self.board.phase,
                           depth - 1))

            # undo the board move so that we can apply another move
            # -- we also go up a level therefore we need to increment depth
            self.undo_effected = self.undo_move()
            self.restore_available_actions()

            if evaluate <= self.alpha:
                return evaluate

            self.beta = min(self.beta, evaluate)

        return evaluate
Пример #8
0
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        #if self.board.phase == constant.PLACEMENT_PHASE:
        self.tt.clear()

        MAX_ITER = 10

        # default policy
        available_actions = self.board.update_actions(self.board, self.player)
        print(len(available_actions))
        action_set = set(available_actions)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        # self.time_alloc = 5000
        # time allocated per move in ms
        self.time_alloc = 0
        total = 120000
        if self.board.phase == constant.PLACEMENT_PHASE:
            #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 1000
        else:
            #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 1000
        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None
        # iterative deepening begins here
        best_move = None
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                # if we have a move that is not none lets always pick that move that is legal
                # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why
                # therefore here we check if a move is legal as well
                if move is not None and move in action_set:
                    best_move = move
            except TimeOut:
                print("TIMEOUT")
                break

            if Negamax.curr_millisecond_time() - start_time > self.time_alloc:
                break

        self.eval_depth = best_depth
        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    # naive Negamax (depth limited)  -- No Transposition Table
    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        dic = {self.player: 1, self.opponent: -1}

        # generate legal actions
        actions_1 = self.board.update_actions(self.board, colour)
        # print(len(actions))
        actions = self.board.sort_actions(actions_1, colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None

        # generate legal actions
        #actions = self.board.update_actions(self.board, colour)
        # split the actions into favourable an unfavourable
        # if the length of actions is greater than X, then we can just choose to look through the first
        # 5 'favourable' actions that we see right now
        # if the length of actions is less than X, then we can just evaluate all possible actions we have
        # THIS IS A GREEDY APPROACH TO MINIMAX THAT LIMITS OUR BRANCHING FACTOR OF THE GAME
        if len(actions) > 8:
            favourable = actions[:8]
        else:
            favourable = actions
        # got here
        #print("got here")
        # depth reduction
        R = 2
        #print(favourable)
        #self.board.print_board()
        for action in favourable:

            self.board.update_board(action, colour)
            if action in favourable:
                score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            else:
                score, temp = self.negamax(depth - 1 - R, -beta, -alpha,
                                           opponent)

            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            self.undo_move()

            if alpha >= beta:
                break

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour, actions):
        #return Evaluation.basic_policy(board,colour)
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_move(self):
        return self.board.undo_move()
Пример #9
0
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        #if self.board.phase == constant.PLACEMENT_PHASE:
        self.tt.clear()

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(self.player)

        action_set = set(available_actions)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        # self.time_alloc = 5000
        # time allocated per move in ms
        self.time_alloc = 0
        total = 120000
        if self.board.phase == constant.PLACEMENT_PHASE:
            #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 3000
        else:
            #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 800

        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None

        # iterative deepening begins here
        best_move = None

        for depth in range(1, MAX_ITER):

            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                # if we have a move that is not none lets always pick that move that is legal
                # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why
                # therefore here we check if a move is legal as well
                if move is not None and move in action_set:
                    best_move = move
                # print(self.board)
                # print("sdfsfsfsfsfsdfsfsdfs")
            except TimeOut:
                break

        self.eval_depth = best_depth

        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    # naive Negamax (depth limited)  -- No Transposition Table
    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)

        # for evaluation
        dic = {self.player: 1, self.opponent: -1}

        # generate legal actions
        actions = self.board.update_actions(colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None
        #print(self.board)
        #print(actions)
        #print(self.board.white_pieces)
        # print(self.board.black_pieces)
        # generate legal actions
        # actions = self.board.update_actions(colour)
        # print("THESE ACTIONS----------------")
        # print(actions)
        # print(self.board)
        # print("*"*30)
        for action in actions:
            # print("THIS CALL--------")
            # print(self.board)
            # print("THIS CALL--------")
            # if self.board.phase == constant.MOVING_PHASE:
            #     piece = self.board.get_piece(action[0])
            #     direction = action[1]
            #     if piece.is_legal_move(direction) is False:
            #         print(actions)
            #         print(self)
            #         print("WHYYYYYYYYYYYYYY--------------------------------------------")
            #         print(action[0], direction, colour)
            #         print(piece)
            #         print(piece.get_legal_actions())

            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            self.undo_action(action, colour, elim)

            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            if alpha >= beta:
                break

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    def evaluate_state(self, board, colour, actions):
        # return len(self.board.white_pieces) - len(self.board.black_pieces)
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_action(self, action, colour, elim_pieces):

        self.board.undo_action(action, colour, elim_pieces)