示例#1
0
    def test_mcts_doesnt_mutate_state(self):
        state = MancalaEnv()
        initial_board = Board.clone(state.board)
        mcts = MCTSFactory.test_mcts()
        mcts.search(state)

        self.assertEqual(initial_board.board, state.board.board, "Expect MCTS doesn't mutate the initial board")
示例#2
0
    def test_from_board_works(self):
        board = Board(7, 7)
        board.set_seeds(Side.SOUTH, 5, 7)
        clone = Board.clone(board)

        # Test they are a identical
        for hole in range(1, board.holes + 1):
            self.assertEqual(board.get_seeds(Side.SOUTH, hole),
                             clone.get_seeds(Side.SOUTH, hole))
            self.assertEqual(board.get_seeds(Side.NORTH, hole),
                             clone.get_seeds(Side.NORTH, hole))
        self.assertEqual(board.get_seeds_in_store(Side.SOUTH),
                         clone.get_seeds_in_store(Side.SOUTH))
        self.assertEqual(board.get_seeds_in_store(Side.NORTH),
                         clone.get_seeds_in_store(Side.NORTH))
示例#3
0
    def clone(other_state):
        board = Board.clone(other_state.board)
        side_to_move = deepcopy(other_state.side_to_move)
        north_moved = deepcopy(other_state.north_moved)

        clone_game = MancalaEnv()
        clone_game.board = board
        clone_game.side_to_move = side_to_move
        clone_game.north_moved = north_moved
        return clone_game
示例#4
0
 def reset(self):
     self.board = Board(7, 7)
     self.side_to_move = Side.SOUTH
     self.north_moved = False
     self.our_side = Side.SOUTH
示例#5
0
    def make_move(board: Board, move: Move, north_moved):
        if not MancalaEnv.is_legal_action(board, move, north_moved):
            raise ValueError(
                'Move is illegal: Board: \n {} \n Move:\n {}/{} \n {}'.format(
                    board, move.index, move.side, north_moved))

        # This is a pie move
        if move.index == 0:
            MancalaEnv.switch_sides(board)
            return Side.opposite(move.side)

        seeds_to_sow = board.get_seeds(move.side, move.index)
        board.set_seeds(move.side, move.index, 0)

        holes = board.holes
        # Place seeds in all holes excepting the opponent's store
        receiving_holes = 2 * holes + 1
        # Rounds needed to sow all the seeds
        rounds = seeds_to_sow // receiving_holes
        # Seeds remaining after all the rounds
        remaining_seeds = seeds_to_sow % receiving_holes

        # Sow the seeds for the full rounds
        if rounds != 0:
            for hole in range(1, holes + 1):
                board.add_seeds(Side.NORTH, hole, rounds)
                board.add_seeds(Side.SOUTH, hole, rounds)
            board.add_seeds_to_store(move.side, rounds)

        # Sow the remaining seeds
        sow_side = move.side
        sow_hole = move.index
        for _ in range(remaining_seeds):
            sow_hole += 1
            if sow_hole == 1:
                sow_side = Side.opposite(sow_side)
            if sow_hole > holes:
                if sow_side == move.side:
                    sow_hole = 0
                    board.add_seeds_to_store(sow_side, 1)
                    continue
                else:
                    sow_side = Side.opposite(sow_side)
                    sow_hole = 1
            board.add_seeds(sow_side, sow_hole, 1)

        # Capture the opponent's seeds from the opposite hole if the last seed
        # is placed in an empty hole and there are seeds in the opposite hole
        if sow_side == move.side and sow_hole > 0 \
                and board.get_seeds(sow_side, sow_hole) == 1 \
                and board.get_seeds_op(sow_side, sow_hole) > 0:
            board.add_seeds_to_store(
                move.side, 1 + board.get_seeds_op(sow_side, sow_hole))
            board.set_seeds(move.side, sow_hole, 0)
            board.set_seeds_op(move.side, sow_hole, 0)

        # If the game is over, collect the seeds not in the store and put them there
        game_over = MancalaEnv.game_over(board)
        if game_over:
            finished_side = Side.NORTH if MancalaEnv.holes_empty(
                board, Side.NORTH) else Side.SOUTH
            seeds = 0
            collecting_side = Side.opposite(finished_side)
            for hole in range(1, board.holes + 1):
                seeds += board.get_seeds(collecting_side, hole)
                board.set_seeds(collecting_side, hole, 0)
            board.add_seeds_to_store(collecting_side, seeds)

        # Return the side which is next to move
        if sow_hole == 0 and (move.side == Side.NORTH or north_moved):
            return move.side  # Last seed was placed in the store, so side moves again
        return Side.opposite(move.side)
示例#6
0
 def switch_sides(board: Board):
     for hole in range(board.holes + 1):
         board.board[0][hole], board.board[1][hole] = board.board[1][
             hole], board.board[0][hole]
示例#7
0
 def holes_empty(board: Board, side: Side):
     for hole in range(1, board.holes + 1):
         if board.get_seeds(side, hole) > 0:
             return False
     return True
示例#8
0
class MancalaEnv(object):
    def __init__(self):
        self.reset()

    @property
    def board(self):
        return self._board

    @board.setter
    def board(self, board: Board):
        self._board = board

    @property
    def side_to_move(self):
        return self._side_to_move

    @side_to_move.setter
    def side_to_move(self, side: Side):
        self._side_to_move = side

    @property
    def north_moved(self):
        return self._north_moved

    @north_moved.setter
    def north_moved(self, moved: bool):
        self._north_moved = moved

    @property
    def our_side(self):
        return self._my_side

    @our_side.setter
    def our_side(self, side: Side):
        self._my_side = side

    def reset(self):
        self.board = Board(7, 7)
        self.side_to_move = Side.SOUTH
        self.north_moved = False
        self.our_side = Side.SOUTH

    @staticmethod
    def clone(other_state):
        board = Board.clone(other_state.board)
        side_to_move = deepcopy(other_state.side_to_move)
        north_moved = deepcopy(other_state.north_moved)

        clone_game = MancalaEnv()
        clone_game.board = board
        clone_game.side_to_move = side_to_move
        clone_game.north_moved = north_moved
        return clone_game

    def get_legal_moves(self) -> List[Move]:
        return MancalaEnv.get_state_legal_actions(self.board,
                                                  self.side_to_move,
                                                  self.north_moved)

    def is_legal(self, move: Move) -> bool:
        return MancalaEnv.is_legal_action(self.board, move, self.north_moved)

    def perform_move(self, move: Move) -> int:
        """Performs a move and returns the reward for this move."""
        seeds_in_store_before = self.board.get_seeds_in_store(move.side)
        if move.index == 0:  # pie move
            self.our_side = Side.opposite(self.our_side)
        self.side_to_move = MancalaEnv.make_move(self.board, move,
                                                 self.north_moved)
        if move.side == Side.NORTH:
            self.north_moved = True
        seeds_in_store_after = self.board.get_seeds_in_store(move.side)

        # Return a partial reward proportional to the number of captured seeds.
        return (seeds_in_store_after - seeds_in_store_before) / 100.0

    def compute_final_reward(self, side: Side):
        """Returns a reward for the specified side for moving to the current state."""
        reward = self.board.get_seeds_in_store(
            side) - self.board.get_seeds_in_store(Side.opposite(side))
        return reward

    def compute_end_game_reward(self, side: Side):
        """Returns a reward for the specified side for moving to the end game state."""
        if not self.is_game_over():
            raise ValueError(
                "compute_end_game_reward should only be called at end of the game"
            )

        reward = self.compute_final_reward(side)
        if reward > 0:
            return 1  # win
        elif reward < 0:
            return 0  # lose
        return 0.5  # tie

    def is_game_over(self) -> bool:
        return MancalaEnv.game_over(self.board)

    def get_actions_mask(self) -> [float]:
        """Returns an np array of 1s and 0s where 1 at index i means that the action with that action is valid. """
        mask = [0 for _ in range(self.board.holes + 1)]
        moves = self.get_legal_moves()
        for action in moves:
            mask[action.index] = 1
        return np.array(mask)

    def get_action_mask_with_no_pie(self) -> [float]:
        """
        Returns an np array of 1s and 0s where 1 at index i means that the action with that action is valid.
        The pie move is not considered.
        """
        mask = [0 for _ in range(self.board.holes)]
        moves = [move.index for move in self.get_legal_moves()]
        if 0 in moves:
            moves.remove(0)
        for action in moves:
            mask[action - 1] = 1
        return np.array(mask)

    def get_winner(self) -> Side or None:
        """
        :return: The winning Side of the game or none if there is a tie.
        """
        if not self.is_game_over():
            raise ValueError(
                'This method should be called only when the game is over')
        finished_side = Side.NORTH if MancalaEnv.holes_empty(
            self.board, Side.NORTH) else Side.SOUTH

        not_finished_side = Side.opposite(finished_side)
        not_finished_side_seeds = self.board.get_seeds_in_store(
            not_finished_side)
        for hole in range(1, self.board.holes + 1):
            not_finished_side_seeds += self.board.get_seeds(
                not_finished_side, hole)
        finished_side_seeds = self.board.get_seeds_in_store(finished_side)

        if finished_side_seeds > not_finished_side_seeds:
            return finished_side
        elif finished_side_seeds < not_finished_side_seeds:
            return not_finished_side
        return None

    # Generate a list of all legal moves given a board state and a side
    @staticmethod
    def get_state_legal_actions(board: Board, side: Side,
                                north_moved: bool) -> List[Move]:
        # If this is the first move of NORTH, then NORTH can use the pie rule action
        legal_moves = [] if north_moved or side == side.SOUTH else [
            Move(side, 0)
        ]
        for i in range(1, board.holes + 1):
            if board.board[side.get_index(side)][i] > 0:
                legal_moves.append(Move(side, i))
        return legal_moves

    @staticmethod
    def is_legal_action(board: Board, move: Move, north_moved: bool) -> bool:
        return move.index in [
            act.index for act in MancalaEnv.get_state_legal_actions(
                board, move.side, north_moved)
        ]

    @staticmethod
    def holes_empty(board: Board, side: Side):
        for hole in range(1, board.holes + 1):
            if board.get_seeds(side, hole) > 0:
                return False
        return True

    @staticmethod
    def game_over(board: Board):
        """
        :param board: The board to be analysed
        :return: True if the game is over and the side which finished
        """
        if MancalaEnv.holes_empty(board, Side.SOUTH):
            return True
        if MancalaEnv.holes_empty(board, Side.NORTH):
            return True
        return False

    @staticmethod
    def switch_sides(board: Board):
        for hole in range(board.holes + 1):
            board.board[0][hole], board.board[1][hole] = board.board[1][
                hole], board.board[0][hole]

    @staticmethod
    def make_move(board: Board, move: Move, north_moved):
        if not MancalaEnv.is_legal_action(board, move, north_moved):
            raise ValueError(
                'Move is illegal: Board: \n {} \n Move:\n {}/{} \n {}'.format(
                    board, move.index, move.side, north_moved))

        # This is a pie move
        if move.index == 0:
            MancalaEnv.switch_sides(board)
            return Side.opposite(move.side)

        seeds_to_sow = board.get_seeds(move.side, move.index)
        board.set_seeds(move.side, move.index, 0)

        holes = board.holes
        # Place seeds in all holes excepting the opponent's store
        receiving_holes = 2 * holes + 1
        # Rounds needed to sow all the seeds
        rounds = seeds_to_sow // receiving_holes
        # Seeds remaining after all the rounds
        remaining_seeds = seeds_to_sow % receiving_holes

        # Sow the seeds for the full rounds
        if rounds != 0:
            for hole in range(1, holes + 1):
                board.add_seeds(Side.NORTH, hole, rounds)
                board.add_seeds(Side.SOUTH, hole, rounds)
            board.add_seeds_to_store(move.side, rounds)

        # Sow the remaining seeds
        sow_side = move.side
        sow_hole = move.index
        for _ in range(remaining_seeds):
            sow_hole += 1
            if sow_hole == 1:
                sow_side = Side.opposite(sow_side)
            if sow_hole > holes:
                if sow_side == move.side:
                    sow_hole = 0
                    board.add_seeds_to_store(sow_side, 1)
                    continue
                else:
                    sow_side = Side.opposite(sow_side)
                    sow_hole = 1
            board.add_seeds(sow_side, sow_hole, 1)

        # Capture the opponent's seeds from the opposite hole if the last seed
        # is placed in an empty hole and there are seeds in the opposite hole
        if sow_side == move.side and sow_hole > 0 \
                and board.get_seeds(sow_side, sow_hole) == 1 \
                and board.get_seeds_op(sow_side, sow_hole) > 0:
            board.add_seeds_to_store(
                move.side, 1 + board.get_seeds_op(sow_side, sow_hole))
            board.set_seeds(move.side, sow_hole, 0)
            board.set_seeds_op(move.side, sow_hole, 0)

        # If the game is over, collect the seeds not in the store and put them there
        game_over = MancalaEnv.game_over(board)
        if game_over:
            finished_side = Side.NORTH if MancalaEnv.holes_empty(
                board, Side.NORTH) else Side.SOUTH
            seeds = 0
            collecting_side = Side.opposite(finished_side)
            for hole in range(1, board.holes + 1):
                seeds += board.get_seeds(collecting_side, hole)
                board.set_seeds(collecting_side, hole, 0)
            board.add_seeds_to_store(collecting_side, seeds)

        # Return the side which is next to move
        if sow_hole == 0 and (move.side == Side.NORTH or north_moved):
            return move.side  # Last seed was placed in the store, so side moves again
        return Side.opposite(move.side)

    def get_player_utility(self) -> int:
        # delta_defend = _defend_seeds(self, Side.SOUTH) - _defend_seeds(self, Side.NORTH)
        # more_than_half_in_store_south = 1000 if self.board.get_seeds_in_store(Side.SOUTH) / 98.0 > 0.5 else 0
        # more_than_half_in_store_north = 1000 if self.board.get_seeds_in_store(Side.NORTH) / 98.0 > 0.5 else 0

        store_score = compute_store_score(self)
        capture_score = compute_score_capture_by(
            self, Side.SOUTH) - compute_score_capture_by(self, Side.NORTH)
        double_move_score = compute_double_moves_score(
            self, Side.SOUTH) - compute_double_moves_score(self, Side.NORTH)
        delta_side_score = (compute_seeds_on_side(self, Side.SOUTH) -
                            compute_seeds_on_side(self, Side.NORTH)) / 2

        # print(self)
        # print(store_score)
        # print(capture_score)
        # print(double_move_score)
        # print(delta_side_score)
        # print('==================================')

        return store_score + capture_score + double_move_score + delta_side_score

    def next_states(self):
        actions = self.get_legal_moves()

        next_states = []
        for action in actions:
            clone = MancalaEnv.clone(self)
            clone.perform_move(action)
            next_states.append((action, clone))
        return next_states

    def __hash__(self) -> int:
        primes = [
            2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
            67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137,
            139, 149, 151, 157, 163
        ]

        hashkey = 0
        hashkey += primes[0] * Side.get_index(self.side_to_move)
        hashkey += primes[1] * int(self.north_moved)
        for hole in range(self.board.holes + 1):
            hashkey += primes[2 + hole] * self.board.board[0][hole]
            hashkey += primes[10 + hole] * self.board.board[1][hole]
        return hashkey

    def __str__(self):
        return "%s" % self.board