Пример #1
0
 def get_state_legal_actions(board: Board, side: Side,
                             north_moved: bool) -> List[Move]:
     # If this is the first move of NORTH, then NORTH can use the pie rule action
     legal_moves = [] if north_moved or side == side.SOUTH else [
         Move(side, 0)
     ]
     for i in range(1, board.holes + 1):
         if board.board[side.get_index(side)][i] > 0:
             legal_moves.append(Move(side, i))
     return legal_moves
Пример #2
0
def _run_game(mcts, state):
    while True:
        msg = protocol.read_msg()
        try:
            msg_type = protocol.get_msg_type(msg)
            if msg_type == MsgType.START:
                first = protocol.interpret_start_msg(msg)
                if first:
                    move = mcts.search(state)
                    protocol.send_msg(protocol.create_move_msg(move.index))
                else:
                    state.our_side = Side.NORTH
            elif msg_type == MsgType.STATE:
                move_turn = protocol.interpret_state_msg(msg)
                state.perform_move(Move(state.side_to_move, move_turn.move))
                if not move_turn.end:
                    if move_turn.again:
                        move = mcts.search(state)
                        # pie rule; optimal move is to swap
                        if move.index == 0:
                            protocol.send_msg(protocol.create_swap_msg())
                        else:
                            protocol.send_msg(
                                protocol.create_move_msg(move.index))

                logging.info("Next side: " + str(state.side_to_move))
                logging.info("The board:\n" + str(state.board))
            elif msg_type == MsgType.END:
                break
            else:
                logging.warning("Not sure what I got " + str(msg_type))
        except InvalidMessageException as e:
            logging.error(str(e))
Пример #3
0
    def expand(self, node: AlphaNode):
        # Tactical workaround the pie move
        if Move(node.state.side_to_move, 0) in node.unexplored_moves:
            node.unexplored_moves.remove(Move(node.state.side_to_move, 0))

        dist, value = self.network.evaluate_state(node.state)
        for index, prior in enumerate(dist):
            expansion_move = Move(node.state.side_to_move, index + 1)
            if node.state.is_legal(expansion_move):
                child_state = MancalaEnv.clone(node.state)
                child_state.perform_move(expansion_move)
                child_node = AlphaNode(state=child_state,
                                       prior=prior,
                                       move=expansion_move,
                                       parent=node)
                node.put_child(child_node)
        # go down the tree
        return node_utils.select_child_with_maximum_action_value(node)
Пример #4
0
    def _run(self) -> Rollout:
        # Choose randomly the side to play
        self.trainer_side = Side.SOUTH if random.randint(
            0, 1) == 0 else Side.NORTH
        # Reset the environment so everything is in a clean state.
        self.env.reset()

        rollout = Rollout()
        while not self.env.is_game_over():
            # There is no choice if only one action is left. Taking that action automatically must be seen as
            # a characteristic behaviour of the environment. This helped the learning of the agent
            # to be more numerically stable (this is an empirical observation).
            if len(self.env.get_legal_moves()) == 1:
                action_left_to_perform = self.env.get_legal_moves()[0]
                self.env.perform_move(action_left_to_perform)
                continue

            if self.env.side_to_move == self.trainer_side:
                # If the agent is playing as NORTH, it's input would be a flipped board
                flip_board = self.env.side_to_move == Side.NORTH
                state = self.env.board.get_board_image(flipped=flip_board)
                mask = self.env.get_action_mask_with_no_pie()

                action, value = self.ac_net.sample(state, mask)
                # Because the pie move with index 0 is ignored, the action indexes must be shifted by one
                reward = self.env.perform_move(
                    Move(self.trainer_side, action + 1))
                rollout.add(state, action, reward, value, mask)
            else:
                assert self.env.side_to_move == Side.opposite(
                    self.trainer_side)
                action = self.opp_agent.produce_action(
                    self.env.board.get_board_image(),
                    self.env.get_action_mask_with_no_pie(),
                    self.env.side_to_move)
                self.env.perform_move(Move(self.env.side_to_move, action + 1))

        # We replace the partial reward of the last move with the final reward of the game
        final_reward = self.env.compute_final_reward(self.trainer_side)
        rollout.update_last_reward(final_reward)

        if self.env.get_winner() == self.trainer_side:
            rollout.add_win()
        return rollout
Пример #5
0
    def test_move_has_required_effects(self):
        self.game.perform_move(Move(Side.SOUTH, 5))
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 5), 0)
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 6), 8)
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 7), 8)
        self.assertEqual(self.game.board.get_seeds_in_store(Side.SOUTH), 1)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 1), 8)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 2), 8)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 3), 8)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 4), 8)

        self.game.perform_move(Move(Side.NORTH, 4))
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 4), 0)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 5), 8)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 6), 8)
        self.assertEqual(self.game.board.get_seeds(Side.NORTH, 7), 8)
        self.assertEqual(self.game.board.get_seeds_in_store(Side.NORTH), 1)
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 1), 8)
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 2), 8)
        self.assertEqual(self.game.board.get_seeds(Side.SOUTH, 3), 8)
Пример #6
0
    def simulate(self, root: AlphaNode) -> float:
        """
            runs a simulation from the root to the end of the game
            :param root: the starting node for the simulation
            :return: the rollout policy; reward for taking this path combining value network with game's winner
        """
        node: AlphaNode = AlphaNode.clone(root)
        while not node.is_terminal():
            move_index, _ = self.network.sample_state(node.state)
            move = Move(node.state.side_to_move, move_index + 1)
            node.state.perform_move(move)

        return node.state
Пример #7
0
    def pg_train_policy(self):
        flip_board = self.env.side_to_move == Side.NORTH
        state = self.env.board.get_board_image(flipped=flip_board)
        valid_actions_mask = self.env.get_actions_mask()

        action = self.agent.sample_action(state, valid_actions_mask)

        seeds_in_store_before = self.env.board.get_seeds_in_store(
            self.agent_side)
        self.env.perform_move(Move(self.agent_side, action))

        seeds_in_store_after = self.env.board.get_seeds_in_store(
            self.agent_side)
        reward = (seeds_in_store_after - seeds_in_store_before) / 10.0
        self.agent.store_rollout(state, action, reward, valid_actions_mask)
Пример #8
0
    def simulate(self, root: Node) -> MancalaEnv:
        node = Node.clone(root)
        while not node.is_terminal():
            legal_moves = node.state.get_legal_moves()
            moves = [-1e80 for _ in range(node.state.board.holes + 1)]
            for move in legal_moves:
                moves[move.index] = evaluation.get_score(
                    state=self._make_temp_child(node, move),
                    parent_side=node.state.side_to_move)

            moves_dist = np.asarray(moves, dtype=np.float64).flatten()
            exp = np.exp(moves_dist - np.max(moves_dist))
            dist = exp / np.sum(exp)

            move_to_make = int(np.random.choice(range(len(moves)), p=dist))
            node.state.perform_move(
                Move(side=node.state.side_to_move, index=move_to_make))

        return node.state
Пример #9
0
def _run_game(player: Player, state: MancalaEnv):
    our_agent_states = []
    their_agent_states = []
    both_agent_states = []
    our_side = Side.SOUTH
    while True:
        msg = protocol.read_msg()
        try:
            msg_type = protocol.get_msg_type(msg)
            if msg_type == MsgType.START:
                first = protocol.interpret_start_msg(msg)
                if first:
                    move = player.get_play(state)
                    protocol.send_msg(protocol.create_move_msg(move.index))
                else:
                    our_side = Side.NORTH
            elif msg_type == MsgType.STATE:
                move_turn = protocol.interpret_state_msg(msg)
                if move_turn.move == 0:
                    our_side = Side.opposite(our_side)

                move_to_perform = Move(state.side_to_move, move_turn.move)

                observed_state = ObservedState(state=state,
                                               action_taken=move_to_perform)
                both_agent_states.append(observed_state)
                if state.side_to_move == our_side:
                    our_agent_states.append(observed_state)
                else:
                    their_agent_states.append(observed_state)

                state.perform_move(move_to_perform)
                if not move_turn.end:
                    if move_turn.again:
                        move = player.get_play(state)
                        # pie rule; optimal move is to swap
                        if move.index == 0:
                            protocol.send_msg(protocol.create_swap_msg())
                        else:
                            protocol.send_msg(
                                protocol.create_move_msg(move.index))

            elif msg_type == MsgType.END:
                args = parser.parse_args()
                run_id = '%06d' % args.run_number
                run_category = args.category

                _our_agent_file_path = _checkpoint_file_path + "/our-agent/" + run_category + run_id
                _their_agent_file_path = _checkpoint_file_path + "/their-agent/" + run_category + run_id
                _both_agent_file_path = _checkpoint_file_path + "/both-agent/" + run_category + run_id

                np.save(file=_our_agent_file_path,
                        arr=np.array(our_agent_states))
                np.save(file=_their_agent_file_path,
                        arr=np.array(their_agent_states))
                np.save(file=_both_agent_file_path,
                        arr=np.array(both_agent_states))
                break
            else:
                print("Not sure what I got " + str(msg_type))
        except InvalidMessageException as _e:
            print(str(_e))
Пример #10
0
 def __init__(self, state: MancalaEnv, action_taken: Move):
     self.state = MancalaEnv.clone(state)
     self.action_taken = Move.clone(action_taken)
Пример #11
0
 def test_side_to_move_doesnt_change(self):
     self.game.perform_move(Move(Side.SOUTH, 1))
     self.assertEqual(self.game.side_to_move, Side.NORTH)
Пример #12
0
 def test_legal_moves_contains_all_moves(self):
     self.assertEqual(len(set(self.game.get_legal_moves())), 7)
     self.game.perform_move(Move(Side.SOUTH, 3))
     self.assertEqual(len(set(self.game.get_legal_moves())), 8)
Пример #13
0
 def test_is_legal_move_returns_true_for_the_pie_rule2(self):
     env = MancalaEnv()
     env.perform_move(Move(Side.SOUTH, 5))
     self.assertTrue(env.is_legal(Move(Side.NORTH, 0)))
Пример #14
0
 def test_is_legal_move_returns_true_for_the_pie_rule(self):
     board = self.game.board
     MancalaEnv.make_move(board, Move(Side.SOUTH, 6), False)
     self.assertTrue(MancalaEnv.is_legal_action(board, Move(Side.NORTH, 0), False))
Пример #15
0
    def test_cloning_immutability(self):
        clone = MancalaEnv.clone(self.game)
        self.game.perform_move(Move(Side.SOUTH, 3))

        self.assertEqual(clone.board.get_seeds(Side.SOUTH, 3), 7)
        self.assertEqual(clone.side_to_move, Side.SOUTH)