Пример #1
0
 def print_graph(self, state_manager):
     """
     Print the DiGraph object representing the current tree
     """
     pos = nx.shell_layout(self.graph)
     blue_player_nodes = []
     red_player_nodes = []
     labels = {}
     state_manager = StateManager(state_manager.board_size,
                                  state_manager.current_player())
     for state in self.graph.nodes:
         state_manager.set_state_manager(state)
         labels[state] = state_manager.pretty_state_string()
         if StateManager.get_player(state) == 1:
             blue_player_nodes.append(state)
         else:
             red_player_nodes.append(state)
     nx.draw_networkx_nodes(
         self.graph,
         pos,
         nodelist=blue_player_nodes,
         node_color=TreeConstants.PLAYER1_COLOR,
         alpha=0.5,
     )
     nx.draw_networkx_nodes(
         self.graph,
         pos,
         nodelist=red_player_nodes,
         node_color=TreeConstants.PLAYER2_COLOR,
         alpha=0.5,
     )
     nx.draw_networkx_edges(self.graph, pos)
     nx.draw_networkx_labels(self.graph, pos, labels, font_size=10)
     plt.show()
Пример #2
0
 def __init__(
     self,
     state_manager: StateManager,
     actor_net,
     max_tree_height=5,
     c=1,
     number_of_simulations=10,
     verbose=False,
     random_simulation_rate=0.2,
 ):
     self.state_manager = StateManager(state_manager.board_size,
                                       state_manager.current_player())
     self.tree = StateTree(self.state_manager.get_state())
     self.tree.add_state_node(self.tree.root_state,
                              self.state_manager.is_end_state())
     self.c = c
     self.max_tree_height = max_tree_height
     self.actor_net = actor_net
     self.number_of_simulations = number_of_simulations
     self.verbose = verbose
     self.random_simulation_rate = random_simulation_rate
Пример #3
0
class GameVisualizer:
    def __init__(
        self,
        board_size,
        player1=None,
        player2=None,
        starting_player=1,
        random_play=False,
        frame_rate=1000,
        initial_state=None,
        cartesian_cords=True,
    ):
        # Game logic
        self.board_size = board_size
        self.initial_state = initial_state
        self.state_manager = StateManager(board_size, starting_player)
        if initial_state:
            self.state_manager.set_state_manager(initial_state)
        self.random_play = random_play

        # Setting players
        self.player1 = player1
        self.player2 = player2

        # WINDOW
        self.master = Tk()
        self.master.title("HexGameVisualizer")
        self.master.protocol("WM_DELETE_WINDOW", self.quit_application)

        self.action_input = Entry(self.master)
        self.action_input.bind("<Return>", lambda event: self.button_clicked())
        self.action_input.pack()

        self.perform_action_button = Button(
            self.master, text="perform move", command=self.button_clicked
        )
        self.perform_action_button.pack()

        # TODO: Add label to describe the players currently playing
        self.label = Label(self.master)
        self.label.pack()

        self.start_pos = (60, 30)
        self.canvas = Canvas(
            self.master,
            width=self.start_pos[0] + self.board_size * 55 + self.start_pos[0],
            height=self.start_pos[1] + self.board_size * 33 + self.start_pos[1],
        )
        self.canvas.pack()

        # CONSTANTS
        self.frame_rate = frame_rate
        self.border_size = 10
        self.counter = 0
        self.size = 20
        self.cartesian_cords = cartesian_cords

        # LISTS CONTROLLING GAME AND DRAWING OF BOARD
        self.board = []
        self.board_border = []
        self.actions = []
        self.player_pieces = []

    def quit_application(self):
        import sys

        self.master.quit()
        sys.exit()

    def add_action(self, action: str):
        self.actions.append(action)

    def preprocess_actions(self):
        new_actions = []
        for action in self.actions:
            new_actions.append(GameVisualizer.preprocess_action(action))
        return new_actions

    @staticmethod
    def preprocess_action(action: str):
        positions, player = action.split(":")
        x_pos, y_pos = positions.split(",")
        return int(x_pos), int(y_pos), int(player)

    def run(self):
        self.actions = self.preprocess_actions()
        self.build_and_draw_board()
        if self.initial_state:
            self.state_manager.set_state_manager(self.initial_state)
            self.draw_initial_state()
        if len(self.actions):
            self.master.after(self.frame_rate, self.draw)
        mainloop()

    def model_perform_action(self, model: ANET):
        print(self.state_manager.get_state())
        distribution = model.predict(self.state_manager.get_state())
        print(distribution)
        argmax_distribution_index = int(
            np.argmax(distribution)
        )  # Greedy best from distribution
        action = self.state_manager.get_action_from_flattened_board_index(
            argmax_distribution_index, self.state_manager.get_state()
        )
        self.perform_action(GameVisualizer.preprocess_action(action))

    def button_clicked(self):
        if self.state_manager.is_end_state():
            return
        try:
            current_player = self.player1 if self.state_manager.current_player() == 1 else self.player2
            if current_player:
                self.model_perform_action(current_player)
            else:
                input_action = (
                    f"{self.action_input.get()}:{self.state_manager.current_player()}"
                )
                if self.random_play:
                    input_action = random.choice(
                        self.state_manager.generate_possible_actions(
                            self.state_manager.get_state()
                        )
                    )
                self.perform_action(GameVisualizer.preprocess_action(input_action))
            self.action_input.delete(0, "end")
        except ValueError:
            self.label["text"] = "Something went wrong"
        if self.state_manager.is_end_state():
            self.label["text"] = "Game over"

    def draw_initial_state(self):
        initial_board = self.state_manager.build_board(self.initial_state)
        for row_index, row in enumerate(initial_board):
            for col_index, player in enumerate(row):
                if player:
                    self.player_pieces.append(
                        Cell(
                            self.canvas,
                            self.board[row_index][col_index].top,
                            player=player,
                        )
                    )

    def get_canvas_position(self, position: (int, int)) -> (int, int):
        x, y = self.start_pos
        x += self.size * 2 * position[1] + self.size * position[0]
        y += (self.size + self.size / 1.7) * position[0]
        return x, y

    def build_and_draw_board(self):
        for i in range(self.board_size):
            row = []
            for j in range(self.board_size):
                row.append(
                    Cell(
                        self.canvas,
                        self.get_canvas_position((i, j)),
                        draw_on_init=False,
                    )
                )
            self.board.append(row)
        self.draw_board_border()
        for row_index, row in enumerate(self.board):
            for col_index, cell in enumerate(row):
                self.board[row_index][col_index].draw()

    def get_column(self, target_col_index: int):
        column = []
        for row_index, row in enumerate(self.board):
            for col_index, cell in enumerate(row):
                if target_col_index == col_index:
                    column.append(cell)
        return column

    def draw_board_border(self):
        borders = []
        first_row = self.board[0]
        borders.append(
            self.canvas.create_polygon(
                first_row[0].left1[0],
                first_row[0].left1[1],
                first_row[0].left1[0] - self.border_size,
                first_row[0].left1[1] - self.border_size,
                first_row[0].left1[0],
                first_row[0].left1[1] - 2 * self.border_size,
                first_row[-1].top[0] + 2 * self.border_size,
                first_row[-1].right1[1] - 2 * self.border_size,
                first_row[-1].top[0],
                (first_row[-1].right1[1] + first_row[-1].right2[1]) / 2,
                fill=PLAYER_ONE_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.top[0] - 1.5 * self.border_size,
                    cell.top[1],
                    text=str(i) if self.cartesian_cords else chr(65 + i),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(first_row)
        ]
        first_column = self.get_column(0)
        borders.append(
            self.canvas.create_polygon(
                first_column[0].left1[0],
                first_column[0].left1[1],
                first_column[0].left1[0] - self.border_size,
                first_column[0].left1[1] - self.border_size,
                first_column[0].left1[0] - 2 * self.border_size,
                first_column[0].left1[1],
                first_column[-1].bottom[0] - 2 * self.border_size,
                first_column[-1].left2[1] + 2 * self.border_size,
                first_column[-1].bottom[0],
                (first_column[-1].left2[1] + first_column[-1].left1[1]) / 2,
                fill=PLAYER_TWO_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.left1[0] - self.border_size / 1.5,
                    (cell.left1[1] + cell.left2[1]) / 2,
                    text=str(i) if self.cartesian_cords else str(i + 1),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(first_column)
        ]
        last_row = self.board[-1]
        borders.append(
            self.canvas.create_polygon(
                last_row[0].bottom[0],
                (last_row[0].right1[1] + last_row[0].right2[1]) / 2,
                last_row[0].bottom[0] - 2 * self.border_size,
                last_row[0].right2[1] + 2 * self.border_size,
                last_row[-1].right2[0],
                last_row[-1].right2[1] + 2 * self.border_size,
                last_row[-1].right2[0] + self.border_size,
                last_row[-1].right2[1] + self.border_size,
                last_row[-1].right2[0],
                last_row[-1].right2[1],
                fill=PLAYER_ONE_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.bottom[0] + 1.2 * self.border_size,
                    cell.bottom[1],
                    text=str(i) if self.cartesian_cords else chr(65 + i),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(last_row)
        ]
        last_column = self.get_column(self.board_size - 1)
        borders.append(
            # Bottom up
            self.canvas.create_polygon(
                last_column[-1].right2[0],
                last_column[-1].right2[1],
                last_column[-1].right2[0] + self.border_size,
                last_column[-1].right2[1] + self.border_size,
                last_column[-1].right2[0] + 2 * self.border_size,
                last_column[-1].right2[1],
                last_column[0].top[0] + 2 * self.border_size,
                last_column[0].right1[1] - 2 * self.border_size,
                last_column[0].top[0],
                (last_column[0].right1[1] + last_column[0].right2[1]) / 2,
                fill=PLAYER_TWO_COLOR,
            )
        )
        [
            borders.append(
                self.canvas.create_text(
                    cell.right1[0] + self.border_size / 1.5,
                    (cell.left1[1] + cell.left2[1]) / 2,
                    text=str(i) if self.cartesian_cords else str(i + 1),
                    fill=FONT_COLOR,
                    font=(FONT, FONT_SIZE),
                )
            )
            for i, cell in enumerate(last_column)
        ]

        return borders

    def get_board_pos(self, pos: (int, int)):
        return self.board_size * pos[0] + pos[1]

    def get_cords(self, board_pos: int):
        return math.floor(board_pos / self.board_size), board_pos % self.board_size

    def draw(self):
        self.perform_action(self.actions.pop(0))
        if len(self.actions) > 0:
            self.master.after(self.frame_rate, self.draw)

    def perform_action(self, action: (int, int, int)):
        print(action)
        x_pos, y_pos, player = action
        self.player_pieces.append(
            Cell(self.canvas, self.board[x_pos][y_pos].top, player=player,)
        )
        self.state_manager.perform_action(f"{x_pos},{y_pos}:{player}")
Пример #4
0
class TOPP:
    def __init__(self, path: str, verbose=False):

        self.models = ANET.load_models(path)
        self.state_manager = None
        self.board_size = ANET.infer_board_size_from_model(self.models[0].model)
        self.verbose = verbose

    def play(self, num_games_per_match):
        """
        Plays out the turnament where all models are played agains each other
        :param num_games_per_match: number of games to be played internally for each match
        """
        # Each row represents how many wins model_{row_index} has won against each model_{col_index}.
        # Hence each col represents how many losses model_{col_index} has against each model_{row_index}
        score_matrix = np.zeros((len(self.models), len(self.models)), dtype=int)
        for index1, player1 in enumerate(self.models):
            for index2, player2 in enumerate(self.models[index1 + 1 :]):
                if self.verbose:
                    print(player1.episode_number)
                    print(player2.episode_number)
                wins_p1, wins_p2 = self.play_match(
                    num_games_per_match, player1, player2
                )
                score_matrix[index1, index2 + index1 + 1] += wins_p1
                score_matrix[index2 + index1 + 1, index1] += wins_p2
        self.display_result(score_matrix)

    def play_match(self, num_games_per_match, player1, player2):
        """
        Runs num_games_per_match games between player1 and player2 where the greedy action is chosen.
        Players start every other game.
        :param num_games_per_match: number of games to be played between two models
        :param player1: Keras NN trained on x number of episodes
        :param player2: Keras NN trained on y number of episodes
        :return: the number og wins for each player
        """
        wins_p1 = 0
        wins_p2 = 0
        starting_player = 1
        for i in range(0, num_games_per_match):
            self.state_manager = StateManager(
                board_size=self.board_size, starting_player=starting_player
            )
            while not self.state_manager.is_end_state():
                current_player = self.state_manager.current_player()
                model = player1 if current_player == 1 else player2
                state = self.state_manager.get_state()
                if self.verbose:
                    print(self.state_manager.pretty_state_string())
                distribution = model.predict(state)
                if self.verbose:
                    for k in range(0, self.board_size):
                        print(
                            [
                                distribution[j]
                                for j in range(
                                    self.board_size * k,
                                    self.board_size * k + self.board_size,
                                )
                            ]
                        )
                argmax_distribution_index = int(
                    np.argmax(distribution)
                )  # Greedy best from distribution
                action = self.state_manager.get_action_from_flattened_board_index(
                    argmax_distribution_index, state
                )
                self.state_manager.perform_action(action)
            if current_player == 1:
                wins_p1 += 1
            else:
                wins_p2 += 1
            starting_player = 1 if starting_player == 2 else 2

        return wins_p1, wins_p2

    def display_result(self, score_matrix):
        """
        Displays the score_matrix as a table
        :param score_matrix: np.array
        """
        header = ["wins \ losses"]
        for model in self.models:
            header.append(model.episode_number)
        header.append("sum")
        t = PrettyTable(header)
        x_axis = []
        y_axis = []
        for index, row in enumerate(score_matrix):
            line = [self.models[index].episode_number]
            x_axis.append(self.models[index].episode_number)
            for cell in row:
                line.append(cell)
            line.append(sum(line[1:]))
            y_axis.append(sum(line[1:-1]))
            t.add_row(line)
        print(t)
        plt.clf()
        plt.plot(x_axis, y_axis)
        plt.title('TOPP')
        plt.ylabel('Number of games won')
        plt.xlabel('Episode saved')
        plt.show()
Пример #5
0
class MCTS:
    def __init__(
        self,
        state_manager: StateManager,
        actor_net,
        max_tree_height=5,
        c=1,
        number_of_simulations=10,
        verbose=False,
        random_simulation_rate=0.2,
    ):
        self.state_manager = StateManager(state_manager.board_size,
                                          state_manager.current_player())
        self.tree = StateTree(self.state_manager.get_state())
        self.tree.add_state_node(self.tree.root_state,
                                 self.state_manager.is_end_state())
        self.c = c
        self.max_tree_height = max_tree_height
        self.actor_net = actor_net
        self.number_of_simulations = number_of_simulations
        self.verbose = verbose
        self.random_simulation_rate = random_simulation_rate

    def run(self, root_state: str, progress: float):
        """
        Main method: Runs the monte carlo tree search algorithm, tree traversal -> rollout -> backprop, m times.
        Then finds the greedy best move from root state of the current tree
        :param root_state: state to run the algorithm from -> root node
        :return: the greedy best action from root node of the current tree
        """
        self.tree.cut_tree_with_new_root_node(root_state)
        self.state_manager.set_state_manager(self.tree.root_state)
        for i in range(self.number_of_simulations):
            rollout_state = self.traverse_tree(self.tree.root_state, depth=0)
            simulation_reward = self.simulate(rollout_state)
            self.backpropagate(rollout_state, simulation_reward)
            self.state_manager.set_state_manager(self.tree.root_state)

        distribution = self.get_distribution(self.tree.root_state)
        self.actor_net.add_case(self.tree.root_state, distribution.copy())
        if random.random() > math.tanh(progress):
            chosen_action = self.choose_action_stochastically(
                np.array(distribution), self.tree.root_state)
        else:
            chosen_action = self.epsilon_greedy_action_from_distribution(
                np.array(distribution), self.tree.root_state, epsilon=0.0)
        if self.verbose:
            print("distribution", distribution)
            print("chosen_action", chosen_action)
        return chosen_action

    # MAIN ALGORITHM METHODS
    def traverse_tree(self, state: str, depth: int) -> str:
        """
        Traversing the tree expanding nodes by using the tree policy (tree_policy)
        :param state: current state
        :param depth: current depth of the tree
        :return chosen state to simulate from
        """
        if depth == self.max_tree_height or self.tree.is_end_state(state):
            return state
        # If the current state has not explored it's children yet: Add all to graph and chose one to simulate from
        elif not self.tree.get_outgoing_edges(state):
            children = self.expand(state)
            return self.choose_random_child(state, children)
        else:
            child = self.tree_policy(state)
            self.state_manager.check_difference_and_perform_action(child)
            if self.tree.get_state_number_of_visits(child) == 0:
                self.tree.set_end_state(child,
                                        self.state_manager.is_end_state())
            return self.traverse_tree(child, depth + 1)

    def expand(self, state) -> [str]:
        """
        Expanding all child nodes from the input state and adding them to the graph.
        :param state: state to find all children from
        :return: list of all child states
        """
        children = StateManager.generate_child_states(state)
        for child in children:
            if child not in self.tree.get_nodes():
                self.tree.add_state_node(child)
            self.tree.add_edge(state, child)
        return children

    def simulate(self, state: str):
        """
        Performs one roll-out using the actor net as policy
        :return: return 1 if the simulation ends in player "true" winning, -1 otherwise
        """
        if self.state_manager.get_state() != state:
            raise ValueError(
                "The state manager is not set to the start of the simulation")
        while not self.state_manager.is_end_state():
            if random.random() < self.random_simulation_rate:
                distribution = self.actor_net.predict(
                    self.state_manager.get_state())
                chosen_action = self.epsilon_greedy_action_from_distribution(
                    distribution, self.state_manager.get_state(), epsilon=0.0)
            else:
                chosen_action = random.choice(
                    self.state_manager.generate_possible_actions(
                        self.state_manager.get_state()))
            self.state_manager.perform_action(chosen_action)
        return MCTS.get_end_state_reward(self.state_manager.current_player())

    def backpropagate(self, state: str, simulation_reward: int):
        """
        Starts at rollout start state and jumps up in the tree updating the nodes sap and number of visits
        :param state: rollout start state
        :param simulation_reward: reward from simulation
        """
        if state == self.tree.root_state:
            self.tree.increment_state_number_of_visits(state)
            return
        parent_state = self.tree.get_parent(state)

        self.tree.increment_state_number_of_visits(state)
        self.tree.increment_edge_number_of_visits(parent_state, state)
        edge_times_enc = self.tree.get_edge_number_of_visits(
            parent_state, state)
        edge_sap_value = self.tree.get_sap_value(parent_state, state)
        new_sap_value = (self.tree.get_sap_value(parent_state, state) +
                         (simulation_reward - edge_sap_value) / edge_times_enc)
        self.tree.set_sap_value(parent_state, state, new_sap_value)
        self.tree.set_active_edge(parent_state, state, False)

        self.backpropagate(parent_state, simulation_reward)

    # HELPER METHODS

    def tree_policy(self, state: str) -> str:
        """
        Using the uct score to determine the child state of a input state
        :param state: input state
        :return: child state
        """
        state_number_of_visits = self.tree.get_state_number_of_visits(state)
        if self.state_manager.get_player(state) == 1:
            best_edge = self.tree.get_outgoing_edges(
                state,
                sort_by_function=lambda edge: self.compute_uct(
                    self.tree.get_sap_value(*edge),
                    state_number_of_visits,
                    self.tree.get_edge_number_of_visits(*edge),
                    True,
                ),
            )[0]
        else:
            best_edge = self.tree.get_outgoing_edges(
                state,
                sort_by_function=lambda edge: self.compute_uct(
                    self.tree.get_sap_value(*edge),
                    state_number_of_visits,
                    self.tree.get_edge_number_of_visits(*edge),
                    False,
                ),
            )[-1]
        parent, best_child = best_edge
        self.tree.set_active_edge(parent, best_child, True)
        return best_child

    def compute_uct(
        self,
        sap_value: float,
        number_of_visits_node: int,
        number_of_visits_edge: int,
        maximizing_player: bool,
    ) -> float:
        """
        Computes the uct for the tree policy
        :param sap_value: sap value for the edge
        :param number_of_visits_node: number of visits for the parent state
        :param number_of_visits_edge: number of visits for the edge between the two nodes
        :param maximizing_player: if the current player is the maximizing player
        :return: uct value
        """
        uct = sap_value
        usa_term = self.c * math.sqrt(
            math.log(number_of_visits_node) / (1 + number_of_visits_edge))
        if maximizing_player:
            uct += usa_term
        else:
            uct -= usa_term
        return uct

    def greedy_best_action(self, state: str) -> str:
        sorted_list = self.tree.get_outgoing_edges(
            state,
            sort_by_function=lambda edge: self.tree.get_edge_number_of_visits(
                *edge),
        )
        return self.state_manager.get_action(*sorted_list[0])

    def choose_random_child(self, parent_state: str, child_list: [str]) -> str:
        """
        Helper method choosing a random state from the child list, updating state manager
        and adding edge and node parameters
        :param parent_state: parent state for the child list (to set edge parameters)
        :param child_list: list of children from parent state
        :return: chosen child
        """
        child = random.choice(child_list)
        self.state_manager.check_difference_and_perform_action(child)
        self.tree.set_end_state(child, self.state_manager.is_end_state())
        self.tree.set_active_edge(parent_state, child, True)
        return child

    def epsilon_greedy_action_from_distribution(self,
                                                distribution: np.ndarray,
                                                state: str,
                                                epsilon=0.2):
        """
        Chooses an epsilon greedy index from the distribution converting that index to an action
        :param distribution: distribution from number of simulations per node
        :param state: current state to calculate action
        :param epsilon: the epsilon value to be used
        :return: actionstring
        """
        if random.random() > epsilon:
            chosen_index = int(np.argmax(distribution))
        else:
            # Choose random state from those with positive probability
            # prob == 0 might be occupied cells on the board
            if not [
                    i[0]
                    for i, prob in np.ndenumerate(distribution) if prob > 0
            ]:
                chosen_index = int(np.argmax(distribution))
            else:
                chosen_index = random.choice([
                    i[0] for i, prob in np.ndenumerate(distribution)
                    if prob > 0
                ])
        return self.state_manager.get_action_from_flattened_board_index(
            chosen_index, state)

    @staticmethod
    def get_end_state_reward(current_player: int) -> int:
        """
        We have chosen player 1 to be "us", giving a positive reward if player 1 wins.
        :param current_player: current player for the state manager
        :return: reward for end state
        """
        return -1 if current_player == 1 else 1

    def get_distribution(self, state: str):
        """
        Returns the distribution of total visits for child nodes of input state
        :param state: state to get distribution from
        :return: a normalized list of length equal to the total number of positions on the board
        """

        parent_board, parent_player = StateManager.extract_state(state)
        child_states = self.tree.get_child_states(state)
        change_indices_dict = {}
        total_visits = 0
        for child in child_states:
            child_board, child_player = StateManager.extract_state(child)
            for i in range(len(child_board)):
                if parent_board[i] != child_board[i]:
                    child_number_of_visits = self.tree.get_edge_number_of_visits(
                        state, child)
                    change_indices_dict[i] = child_number_of_visits
                    total_visits += child_number_of_visits
                    break

        return [
            change_indices_dict[index] /
            total_visits if index in change_indices_dict else 0
            for index in range(self.state_manager.board_size**2)
        ]

    def set_random_simulation_rate(self, new_rate: float):
        self.random_simulation_rate = new_rate

    def choose_action_stochastically(self, distribution, state):
        chosen_index = np.random.choice([i for i in range(len(distribution))],
                                        p=distribution)
        return self.state_manager.get_action_from_flattened_board_index(
            chosen_index, state)