示例#1
0
文件: board.py 项目: khoa698/kalahbot
    def set_seeds_op(self, side: Side, hole: int, seeds: int):
        if hole < 1 or hole > self.holes:
            raise ValueError(
                'Hole number must be between 1 and number of holes')
        if seeds < 0:
            raise ValueError('There has to be a non-negative number of seeds')

        self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                        hole] = seeds
示例#2
0
文件: board.py 项目: khoa698/kalahbot
 def clone(cls, original_board):
     holes = original_board.holes
     board = cls(holes, 0)
     for hole in range(1, holes + 1):
         board.board[Side.get_index(Side.NORTH)][hole] \
             = deepcopy(original_board.board[Side.get_index(Side.NORTH)][hole])
         board.board[Side.get_index(Side.SOUTH)][hole] \
             = deepcopy(original_board.board[Side.get_index(Side.SOUTH)][hole])
     return board
示例#3
0
文件: board.py 项目: khoa698/kalahbot
    def __str__(self):
        board_str = str(self.board[Side.get_index(Side.NORTH)][0]) + " --"
        for i in range(self.holes, 0, -1):
            board_str += " " + str(self.board[Side.get_index(Side.NORTH)][i])
        board_str += "\n"

        for i in range(1, self.holes + 1, 1):
            board_str += " " + str(self.board[Side.get_index(Side.SOUTH)][i])
        board_str += " --  " + str(self.board[Side.get_index(
            Side.SOUTH)][0]) + "\n"

        return board_str
示例#4
0
文件: board.py 项目: khoa698/kalahbot
    def __init__(self, holes: int, seeds: int):
        if holes < 1:
            raise ValueError('There has to be at least one hole')
        if seeds < 0:
            raise ValueError('There has to be a non-negative number of seeds')

        self._holes = holes

        # Place the seeds in the holes
        self.board = [[0 for _ in range(holes + 1)] for _ in range(2)]
        for hole in range(1, holes + 1):
            self.board[Side.get_index(Side.NORTH)][hole] = seeds
            self.board[Side.get_index(Side.SOUTH)][hole] = seeds
示例#5
0
 def update_env_after_move(board: Board, move: Move, north_moved):
     if not KalahEnvironment.is_permitted(board, move, north_moved):
         raise Exception('Move not permitted')
     if move.index == 0:
         KalahEnvironment.swap_sides(board)
         return Side.opposite(move.side)
     seeds_to_sow = board.get_seeds(move.side, move.index)
     board.set_seeds(move.side, move.index, 0)
     holes = board.holes
     receiving_holes = 2 * holes + 1
     rounds = seeds_to_sow // receiving_holes
     remaining_seeds = seeds_to_sow % receiving_holes
     if rounds != 0:
         for hole in range(1, holes + 1):
             board.add_seeds(Side.NORTH, hole, rounds)
             board.add_seeds(Side.SOUTH, hole, rounds)
         board.add_seeds_to_store(move.side, rounds)
     sow_side = move.side
     sow_hole = move.index
     for _ in range(remaining_seeds):
         sow_hole += 1
         if sow_hole == 1:
             sow_side = Side.opposite(sow_side)
         if sow_hole > holes:
             if sow_side == move.side:
                 sow_hole = 0
                 board.add_seeds_to_store(sow_side, 1)
                 continue
             else:
                 sow_side = Side.opposite(sow_side)
                 sow_hole = 1
         board.add_seeds(sow_side, sow_hole, 1)
     if sow_side == move.side and sow_hole > 0 and board.get_seeds(sow_side, sow_hole) == 1 \
             and board.get_seeds_op(sow_side, sow_hole) > 0:
         board.add_seeds_to_store(move.side, 1 + board.get_seeds_op(sow_side, sow_hole))
         board.set_seeds(move.side, sow_hole, 0)
         board.set_seeds_op(move.side, sow_hole, 0)
     game_over = KalahEnvironment.game_finished(board)
     if game_over:
         finished_side = Side.NORTH if KalahEnvironment.side_has_no_seeds(board, Side.NORTH) else Side.SOUTH
         seeds = 0
         collecting_side = Side.opposite(finished_side)
         for hole in range(1, board.holes + 1):
             seeds += board.get_seeds(collecting_side, hole)
             board.set_seeds(collecting_side, hole, 0)
         board.add_seeds_to_store(collecting_side, seeds)
     if sow_hole == 0 and (move.side == Side.NORTH or north_moved):
         return move.side
     return Side.opposite(move.side)
示例#6
0
def h1(state: KalahEnvironment, side: Side) -> float:
    my_mancala = state.board.get_seeds_in_store(side)
    opponent_mancala = state.board.get_seeds_in_store(Side.opposite(side))

    diff = my_mancala - opponent_mancala

    return diff
示例#7
0
文件: board.py 项目: khoa698/kalahbot
    def add_seeds(self, side: Side, hole: int, seeds: int):
        if hole < 1 or hole > self.holes:
            raise ValueError(
                'Hole number must be between 1 and number of holes')
        if seeds < 0:
            raise ValueError('There has to be a non-negative number of seeds')

        self.board[Side.get_index(side)][hole] += seeds
示例#8
0
 def get_winner(self) -> Side or None:
     if not self.has_game_ended():
         raise Exception('Game has not ended')
     last_move_side = Side.NORTH if KalahEnvironment.side_has_no_seeds(self.board, Side.NORTH) else Side.SOUTH
     other_side = Side.opposite(last_move_side)
     last_move_side_seeds = self.board.get_seeds_in_store(other_side)
     for hole in range(1, self.board.holes + 1):
         last_move_side_seeds += self.board.get_seeds(other_side, hole)
     other_side_seeds = self.board.get_seeds_in_store(last_move_side)
     if other_side_seeds > last_move_side_seeds:
         return last_move_side
     elif other_side_seeds < last_move_side_seeds:
         return other_side
     else:
         return None
示例#9
0
def env_runner(env, trainer_side, ac_net, opp_agent):
    """
The logic of the thread runner.  In brief, it constantly keeps on running
the policy, and as long as the rollout exceeds a certain length, the thread
runner appends the policy to the queue.
"""
    rollout = Rollout()

    while not env.has_game_ended():
        # There is no choice if only one action is left. Taking that action automatically must be seen as
        # a characteristic behaviour of the environment. This helped the learning of the agent
        # to be more numerically stable (this is an empirical observation).
        if len(env.get_valid_moves()) == 1:
            action_left_to_perform = env.get_valid_moves()[0]
            env.do_move(action_left_to_perform)
            continue

        if env.side_to_play == trainer_side:
            # If the agent is playing as NORTH, it's input would be a flipped board
            flip_board = env.side_to_play == Side.NORTH
            state = env.board.get_board_image(flipped=flip_board)
            mask = env.get_mask()

            action, value = ac_net.sample(state, mask)
            # Because the pie move with index 0 is ignored, the action indexes must be shifted by one
            reward = env.do_move(Move(trainer_side, action + 1))
            rollout.add(state, action, reward, value, mask)
        else:
            assert env.side_to_play == Side.opposite(trainer_side)
            action = opp_agent.produce_action(env.board.get_board_image(),
                                              env.get_mask(), env.side_to_play)
            env.do_move(Move(env.side_to_play, action + 1))

        # We replace the partial reward of the last move with the final reward of the game
    final_reward = env.calculate_score_diff(trainer_side)
    rollout.update_last_reward(final_reward)

    if env.get_winner() == trainer_side:
        rollout.add_win()
    return rollout
示例#10
0
 def calculate_score_diff(self, side: Side):
     diff = self.board.get_seeds_in_store(side) - self.board.get_seeds_in_store(Side.opposite(side))
     return diff
示例#11
0
文件: board.py 项目: khoa698/kalahbot
 def get_seeds(self, side: Side, hole: int) -> int:
     if hole < 1 or hole > self.holes:
         raise ValueError(
             'Hole number must be between 1 and number of holes')
     return self.board[Side.get_index(side)][hole]
示例#12
0
 def do_move(self, move: Move):
     if move.index == 0:
         self.my_side = Side.opposite(self.my_side)
     self.side_to_play = KalahEnvironment.update_env_after_move(self.board, move, self.north_has_moved)
     if move.side == Side.NORTH:
         self.north_has_moved = True
示例#13
0
文件: board.py 项目: khoa698/kalahbot
 def get_seeds_op(self, side: Side, hole: int):
     if hole < 1 or hole > self.holes:
         raise ValueError(
             'Hole number must be between 1 and number of holes')
     return self.board[Side.get_index(Side.opposite(side))][self.holes + 1 -
                                                            hole]
示例#14
0
 def get_valid_moves_at_state(board: Board, side: Side, north_moved: bool) -> List[Move]:
     valid_moves = [] if north_moved or side == side.SOUTH else [Move(side, 0)]
     for hole in range(1, board.holes + 1):
         if board.board[side.get_index(side)][hole] > 0:
             valid_moves.append(Move(side, hole))
     return valid_moves
示例#15
0
 def __repr__(self) -> str:
     return "Side: %s; Hole: %d" % (Side.side_to_str(self.side), self.index)
示例#16
0
 def __hash__(self) -> int:
     return self.index + (Side.get_index(self.side) * 10)
示例#17
0
 def __eq__(self, other):
     return isinstance(other, self.__class__) \
            and self.side == other.side \
            and Side.get_index(self.side) == Side.get_index(other.side)
示例#18
0
文件: board.py 项目: khoa698/kalahbot
 def set_seeds_in_store(self, side: Side, seeds: int):
     if seeds < 0:
         raise ValueError('There has to be a non-negative number of seeds')
     self.board[Side.get_index(side)][0] = seeds
示例#19
0
文件: board.py 项目: khoa698/kalahbot
 def get_seeds_in_store(self, side: Side):
     return self.board[Side.get_index(side)][0]