Пример #1
0
def game_step():
    """
    this function is called everytime a player/ai wants to play a move
    on the board, the position passed is in [0,63] where 0 is bottom right
    corner

    Returns
    -------
    json : json
        black_board, white_board, legal_moves, player, done
    """
    global board_state, board_legal_moves
    # play the move chosen by human
    pos = int(request.form['position'])
    # ai steps if pos is -1
    if (pos == -1):
        a = ai_player_move(board_state, board_legal_moves)
    else:  # human player move
        a = 1 << pos
    board_state, board_legal_moves, player, done = env.step(board_state, a)
    # return new states
    return jsonify(black_board=get_set_bits_list(board_state[0]),
                   white_board=get_set_bits_list(board_state[1]),
                   legal_moves=get_set_bits_list(board_legal_moves),
                   player=player,
                   done=done)
Пример #2
0
    def __init__(self, s, legal_moves, m=-1, terminal=0, parent=None):
        """Initializer for the node class, tree is a collection of nodes

        Parameters
        ----------
        s : tuple
            the board state represented as bitboards
        legal_moves : 64 bit int
            bits corresponding to legal positions are set to 1 in this int
        m : int (optional)
            the move played to get to this state, this denotes the position
            from the right end of array (not 64 bit)
        terminal : int (optional)
            flag denoting whether this is a leaf node
        parent : int (optional)
            index of the parent of this node in the mcts node list
        """
        self.state = s
        self.legal_moves = legal_moves
        # convert the 64 bit legal moves into a set of positions
        # for fast use later
        self.legal_moves_set = get_set_bits_list(legal_moves)
        np.random.shuffle(self.legal_moves_set)
        # to compare whether all children have been added or not
        self.total_legal_moves = get_total_set_bits(legal_moves)
        self.w = 0
        self.n = 0
        self.N = 0
        self.children = []
        # since we have shuffled the legal_moves_set, we can use total_children
        # as the idx from where we have to pick the next unexplored move
        self.total_children = 0
        self.move = m
        self.terminal = terminal
        self.parent = parent
Пример #3
0
    def move(self, board, legal_moves, value=None):
        """Get the action with maximum Q value
        
        Parameters
        ----------
        board : Numpy array
            The board state on which to calculate best action
        value : None, optional
            Kept for consistency with other agent classes

        Returns
        -------
        output : Numpy array
            Selected action using the argmax function
        """
        # use the agent model to make the predictions
        if np.random.random() > self.epsilon and legal_moves:
            model_outputs = self._get_model_outputs(board, self._model)[0]
            legal_moves = self._converter.convert(legal_moves,
                      input_format='bitboard_single', output_format='ndarray')\
                        .reshape((1,-1))[0]
            return 1 << int((63 - np.argmax(
                np.where(legal_moves == 1, model_outputs, -np.inf))))

        else:
            if (not legal_moves):
                a = 0
            return 1 << get_random_move_from_list(
                get_set_bits_list(legal_moves))
Пример #4
0
def coin_choice():
    """
    this function is called after the player has chosen which coin
    to play with, the environment is reset here and the board
    data is returned in json format

    Returns
    -------
    json : json
        black_board, white_board, legal_moves, player, 
        done (if game ended or not), ai_player_coin (0/1), score_display_html
    """
    global board_state, board_legal_moves, ai_player_coin, env
    # get the color in the ajax call and reset board accordingly
    c = request.form['color']
    # set ai_player color accordingly
    if (c == 'white'):
        ai_player_coin = 0
    elif (c == 'black'):
        ai_player_coin = 1
    else:  # c == 'random'
        if (random() < 0.5):
            ai_player_coin = 0
        else:
            ai_player_coin = 1
    # reset the environment
    done = 0
    board_state, board_legal_moves, player = env.reset()
    # read the html to render for score display
    with open('templates/score_display.html', 'r') as f:
        score_display_html = f.read()
    # append the reset button to html
    with open('templates/reset.html', 'r') as f:
        score_display_html += f.read()
    # modify this html if necessary
    if (ai_player_coin == 1):
        score_display_html = score_display_html\
                            .replace('AI (Black)', 'AI (White)')\
                            .replace('You (White)', 'You (Black)')
    # return the boards and other data, html
    return jsonify(black_board=get_set_bits_list(board_state[0]),
                   white_board=get_set_bits_list(board_state[1]),
                   legal_moves=get_set_bits_list(board_legal_moves),
                   player=player,
                   done=done,
                   ai_player_coin=ai_player_coin,
                   score_display_html=score_display_html)
Пример #5
0
    def move(self, s, legal_moves):
        """Select a move randomly, given the board state and the
        set of legal moves

        Parameters
        ----------
        s : tuple
            contains black and white bitboards and current player
        legal_moves : int (64 bit)
            legal states are set to 1

        Returns
        -------
        a : int (64 bit)
            bitboard representing position to play
        """
        if (not legal_moves):
            return 0
        return 1 << get_random_move_from_list(get_set_bits_list(legal_moves))
Пример #6
0
    def train(self, n=100):
        """Train the MCTS tree for n number of iterations

        Parameters
        ----------
        n : int (optional)
            the number of simulation steps to run
        """
        while (n):
            n -= 1
            ##############################
            ####### Selection Phase ######
            ##############################
            """select a node in the tree that is neither a leaf node
            nor fully explored"""
            e = 0
            while (True):
                node = self._node_list[e]
                if(node.total_legal_moves != \
                   node.total_children or \
                   node.terminal == 1):
                    # at least one unexplored move is present, stop the
                    # selection here
                    break
                else:
                    # since all nodes of previous node were explored at least
                    # once, we go to the next level and select the child
                    # with highest ucb1
                    next_node = None
                    best_ucb1 = -np.inf
                    for idx in node.children:
                        ucb1 = self._node_list[idx].get_ucb1(self._c)
                        if (ucb1 > best_ucb1):
                            best_ucb1 = ucb1
                            next_node = idx
                    e = next_node
            # this defaults to the root in case the else condition is not run
            node, node_idx = self._node_list[e], e

            ##############################
            ####### Expansion Phase ######
            ##############################
            """select one of the child nodes for this node which is 
            unexplored"""
            if (not node.terminal):
                """first get a random move from the moves which have not 
                been added to the mcts tree yet"""
                # m = self.get_not_added_move(node)
                m = node.legal_moves_set[node.total_children]
                # play the game and add new node to tree (node list)
                next_state, next_legal_moves, _, done = \
                                    self._env.step(node.state, 1<<m)
                node = Node(s=next_state.copy(),
                            legal_moves=next_legal_moves,
                            m=m,
                            terminal=done,
                            parent=e)
                # add node to node list
                self._node_list.append(node)
                # add the idx in this list to the parent's children list
                self._node_list[e].add_child(len(self._node_list) - 1)
                node_idx = len(self._node_list) - 1

            ##############################
            ###### Simulation Phase ######
            ##############################
            """play till the end by randomly selecting moves starting from the
            newly created node (in case of terminal node this step is skipped"""
            s = node.state
            legal_moves = node.legal_moves
            if (node.terminal != 1):
                done = 0
                while (not done):
                    a = get_random_move_from_list(
                        get_set_bits_list(legal_moves))
                    s, legal_moves, _, done = self._env.step(s, 1 << a)
            winner = self._env.get_winner(s)

            ##############################
            #### Backpropagation Phase ###
            ##############################
            """backproagate the winner value from node (from where we started
            to play) to root to update statistical parameters for each node"""
            while (True):
                node.n += 1
                # update the value of N in children
                for c in node.children:
                    self._node_list[c].N = node.n
                if (winner != -1):
                    node.w += (1 - winner == self._env.get_player(node.state))
                else:
                    # tie
                    node.w += 0.5
                # move one level up
                if (node.parent is None):
                    break
                else:
                    node, node_idx = self._node_list[node.parent], node.parent
Пример #7
0
    def move(self,
             s,
             legal_moves,
             current_depth=0,
             get_max=1,
             alpha=-np.inf,
             beta=np.inf):
        """Select a move randomly, given the board state and the
        set of legal moves

        Parameters
        ----------
        s : tuple
            contains black and white bitboards and current player
        legal_moves : int (64 bit)
            legal states are set to 1
        current_depth : int
            tracks the depth in the recursion
        get_max : int
            denotes whether to play as maximum/original player, 
            only useful when recursion depth > 1, 1 is max and 0 is min player
        alpha : int
            tracks the maximum among all the nodes, useful for pruning
        beta : int
            tracks the minimum among all the nodes, useful for pruning

        Returns
        -------
        a : int (64 bit)
            bitboard representing position to play
        """
        # max player
        if (current_depth == 0):
            self._player = self._env.get_player(s)
        # get the indices of the legal moves
        move_list = get_set_bits_list(legal_moves)
        h_list = []
        m_list = []
        for m in move_list:
            s_next, legal_moves, _, done = self._env.step(s, 1 << m)
            if (current_depth < self._depth and not done):
                h_list.append(
                    self.move(s_next, legal_moves, current_depth + 1,
                              1 - get_max, alpha, beta))
            else:
                h_list.append(
                    self._board_heuristics(legal_moves, get_max, s_next))
            m_list.append(m)
            # print(current_depth, h_list, m, legal_moves, s, alpha, beta)
            # adjust alpha and beta
            # print(current_depth, alpha, beta, h_list[-1],
            # len(move_list), m, get_max)
            if (get_max):
                alpha = max(alpha, h_list[-1])
            else:
                beta = min(beta, h_list[-1])
            if (beta <= alpha):
                break
        # return the best move
        if (current_depth == 0):
            return 1 << m_list[np.argmax(h_list)]
        if (get_max):
            return alpha
        else:
            return beta